调用方法
// 检查请求是否来自爬虫 if ($this->isCrawler()) { $this->ajaxReturn(array('status'=>'0','info'=>'爬虫访问')); } //限制访问次数 $result = $this->api_frequency_visits(UID); if (!$result) { $this->ajaxReturn(array('status'=>'0','info'=>'访问次数过多')); }
// 检查用户代理是否像常规浏览器 function isCrawler() { $userAgent = $_SERVER['HTTP_USER_AGENT']; if(!$userAgent){ return true; //true 表示是爬虫 } $knownCrawlers = [ 'Googlebot', 'Bingbot', 'Slurp', 'DuckDuckBot', 'Baidu', 'Yahoo', 'Yandex', ]; foreach ($knownCrawlers as $crawler) { if (strpos($userAgent, $crawler) !== false) { return true; } } if(substr($userAgent, 0,16)=="python-requests/" or substr($userAgent, 0,14)=="Python-urllib/"){ return true; } return false; } /** * @param $uid * @return bool|int * 检测用户接口访问频率 */ function api_frequency_visits ($uid) { $key = "user:{$uid}:api:frequency"; $redis = new Redis(); $redis->connect('127.0.0.1'); $data = $redis->hGetAll($key); //需要删除的key $del_key = []; //时间内访问的总次数 $total = 0; //时间内最大访问次数 $max_frequency = 10; //当前时间 $now_time = time(); //限制时间 $limit_time = 60; foreach ($data as $time=>$count) { if ($time < $now_time - $limit_time) { $del_key[] = $time; } else { $total += $count; } } //存在需要删除的key if ($del_key) { $redis->hDel($key, ...$del_key); } if ($total >= $max_frequency) { return false; } return $redis->hIncrBy($key, $now_time, 1); }
标签:return,redis,爬虫,frequency,key,time,userAgent,方法 From: https://www.cnblogs.com/zinging/p/18196218