php抓取google关键词排名的实例代码

发布时间:2020-06-23编辑:脚本学堂
分享一例php获取google关键词排名的代码,利用PHP的curl函数储存cookie来实现,感兴趣的朋友可以参考下。

本节内容:
抓取google关键词排名

实现思路:
使用PHP的curl函数储存cookie,google搜索页面是无法用file_get_connents打开的,必须要完全模拟浏览器才行。
百度则可以直接用file_get_conntens抓取页面,然后用正则处理下即可。

例子:
 

复制代码 代码示例:

<?php
header("Content-Type: text/html;charset=utf-8");

function ggsearch($url_s, $keyword, $page = 1) {
        $enKeyword = urlencode($keyword);
        $rsState = false;
        $page_num = ($page -1) * 10;
        if ($page <= 10) {
$interface = "eth0:" . rand(1, 4); //避免GG封IP
$cookie_file = dirname(__FILE__) . "/temp/google.txt"; //存储cookie值
$url = "http://www.google.com/search?q=$enKeyword&hl=en&prmd=imvns&ei=JPnJTvLFI8HlggeXwbRl&start=$page_num&sa=N";
$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, $url);

//curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']);//获取浏览器类型
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5");
curl_setopt($ch, CURLOPT_INTERFACE, "$interface"); //指定访问IP地址
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
$contents = curl_exec($ch);
 
curl_close($ch);

$match = "!<divs*id="search">(.*)</div>s+<!--z-->!";

preg_match_all("$match", "$contents", $line);

while (list ($k, $v) = each($line[0])) {

        preg_match_all("!<h3s+class="r"><a[^>]+>(.*?)</a>!", $v, $title);

        $num = count($title[1]);

        for ($i = 0; $i < $num; $i++) {

if (strstr($title[0][$i], $url_s)) {

        $rsState = true;

        $j = $i +1;

        $sum = $j + (($page) * 10 - 10);

        //echo $contents;

        echo "关键字" . $keyword . "<br>" . "排名:" . '<font color="red" size="20" >' . $sum . '</font>' . "####" . "第" . '<font color="#00FFFF" size="18" >'.$page . '</font>'. " 页" . "第" .'<font color="#8000FF" size="15" >'.$j . '</font>'. "名" . $title[0][$i] . "<br>";

        echo "<a href='" . $url . "'>" . "点击搜索结果" . "</a>" . "<br>";

        echo "<hr>";

        break;
}
        }
}

unset ($contents);
if ($rsState === false) {
        ggsearch($url_s, $keyword, ++ $page); //找不到搜索页面的继续往下搜索
}

        } else {
echo '关键字' . $keyword . '10页之内没有该网站排名' . '<br>';
echo "<hr>";
        }
}

if (!empty ($_POST['submit'])) {
        $time = explode(' ', microtime());
        $start = $time[0] + $time[1];
        $more_key = trim($_POST['textarea']);
        $url_s = trim($_POST['url']);
        if (!empty ($more_key) && !empty ($url_s)) {
/*判断输入字符的规律*/
if (strstr($more_key, "n")) {
        $exkey = explode("n", $more_key);
}

if(strstr($more_key, "|")) {
        $exkey = explode("|", $more_key);
}

if(!strstr($more_key, "n")&&!strstr($more_key, "|")){
$exkey=array($more_key);
}
/*判断是否有www或者http://之类的东西*/
 if (count(explode('.', $url_s)) <= 2) {
$url = ltrim($url_s, 'http://www');
$url = 'www.' . $url_s;
}

foreach ($exkey as $keyword) {
        //$keyword;
        ggsearch($url_s, $keyword);
}
$endtime = explode(' ', microtime());
$end = $endtime[0] + $endtime[1];
echo '<hr>';
echo '程序运行时间: ';
echo $end - $start;
//die();
 }
}
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>抓取排名 - www.jb200.com</title>
</head>
<body>
<form action="" method="post">
<span>关键字:</span> <textarea name="textarea" rows="20" cols="40" wrap="off">
格式例如:keyword1|keyword2|keyword3
  或者:   keyword1
          keyword2
          keyword3
  </textarea>
       <span>url地址:</span><input type="text" name="url">
      <input type="submit" name="submit" value="搜索">
</form>
</body>
</html>