php 爱站关键词采集器的示例代码

发布时间:2020-07-31编辑:脚本学堂
分享一例php代码,实现采集爱站提供的关键词,是一个小巧的爱站关键词采集器,有需要的朋友参考下。

本节内容:
PHP 爱站关键词采集器。

例子:
 

复制代码 代码示例:
<?php
/**
* 爱站关键词 采集工具
* by www.jb200.com
*/
header("Content-type: text/html; charset=utf-8");
 
$word=$_GET['word'];
 
$username = "****@163.com";//你的爱站账户
$passwd = "***";//你的爱站密码
$login_url = "http://www.aizhan.com/login.php";
$user_agent = "Mozilla/5.0 (Windows NT 6.2; rv:17.0) Gecko/20100101 Firefox/17.0";
 
//获取sessionid
$session_header = array ();
$session_header [] = "Host: www.aizhan.com";
$session_header [] = "User-Agent: " . $user_agent;
$session_header [] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
$session_header [] = "Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3";
$session_header [] = "Accept-Encoding: gzip, deflate";
$session_header [] = "Connection: keep-alive";
 
$ckfile = tempnam ( "./temp", "tem" );
 
$curl = curl_init ();
curl_setopt ( $curl, CURLOPT_URL, $login_url );
curl_setopt ( $curl, CURLOPT_USERAGENT, $user_agent );
curl_setopt ( $curl, CURLOPT_HTTPHEADER, $session_header );
curl_setopt ( $curl, CURLOPT_ENCODING, 'gzip, deflate' );
curl_setopt ( $curl, CURLOPT_HEADER, 1 );
curl_setopt ( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt ( $curl, CURLOPT_COOKIEJAR, $ckfile );
curl_setopt ( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt ( $curl, CURLOPT_TIMEOUT, 15 );
$html = curl_exec ( $curl );
curl_close ( $curl );
 
$cookie_str = file ( $ckfile );
foreach ( $cookie_str as $v ) {
    if (stripos ( $v, 'PHPSESSID' ) != FALSE) {
        $cook = preg_split ( "/[s]+/", $v );
        $result = array_search ( "PHPSESSID", $cook );
        if ($result != FALSE) {
            $cookie ['PHPSESSID'] = $cook [$result + 1];
        }
    }
}
unset ( $session_header );
unset ( $curl );
 
$cookie_str = "PHPSESSID=".$cookie ['PHPSESSID'];
 
//登陆aizhan
//$ckfile2 = tempnam ("./temp", "login");
$login_header = array ();
$login_header [] = "Host: www.aizhan.com";
$login_header [] = "User-Agent: " . $user_agent;
$login_header [] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
$login_header [] = "Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3";
$login_header [] = "Accept-Encoding: gzip, deflate";
$login_header [] = "Connection: keep-alive";
$login_header [] = "Referer: http://www.aizhan.com/login.php";
$login_header [] = "Cookie: " . $cookie_str;
$login_header [] = "Content-Type: application/x-www-form-urlencoded";
//$login_header[] = "Content-Length: 51";
$login_post = array ('refer' => '', 'email' => $username, 'password' => $passwd );
 
//print_r ( $login_header );
 
$post_str = '';
foreach ( $login_post as $k => $v ) {
    $post_str .= $k . '=' . $v . '&';
}
$post_str = substr ( $post_str, 0, - 1 );
 
$curl = curl_init ();
curl_setopt ( $curl, CURLOPT_URL, $login_url );
curl_setopt ( $curl, CURLOPT_USERAGENT, $user_agent );
curl_setopt ( $curl, CURLOPT_HTTPHEADER, $login_header );
curl_setopt ( $curl, CURLOPT_POST, 1 );
curl_setopt ( $curl, CURLOPT_POSTFIELDS, $post_str );
curl_setopt ( $curl, CURLOPT_ENCODING, 'gzip, deflate' );
curl_setopt ( $curl, CURLOPT_HEADER, 1 );
curl_setopt ( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt ( $curl, CURLOPT_COOKIEJAR, $ckfile );
curl_setopt ( $curl, CURLOPT_COOKIEFILE, $ckfile );
curl_setopt ( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt ( $curl, CURLOPT_TIMEOUT, 15 );
$html = curl_exec ( $curl );
curl_close ( $curl );
 
//获取指数
$cookie_str = file($ckfile);
 
$cookie_str = file($ckfile);
foreach($cookie_str as $v){
    if(stripos($v,'PHPSESSID')!=FALSE){
        $cook = preg_split("/[s]+/", $v);
        $result = array_search("PHPSESSID", $cook);
        if($result!=FALSE){
            $cookie['PHPSESSID']=$cook[$result+1];
        }
    }
    if(stripos($v,'userId')!=FALSE){
        $cook = preg_split("/[s]+/", $v);
        $result = array_search("userId", $cook);
        if($result!=FALSE){
            $cookie['userId']=$cook[$result+1];
        }
    }
    if(stripos($v,'userName')!=FALSE){
        $cook = preg_split("/[s]+/", $v);
        $result = array_search("userName", $cook);
        if($result!=FALSE){
            $cookie['userName']=$cook[$result+1];
        }
    }
    if(stripos($v,'userGroup')!=FALSE){
        $cook = preg_split("/[s]+/", $v);
        $result = array_search("userGroup", $cook);
        if($result!=FALSE){
            $cookie['userGroup']=$cook[$result+1];
        }
    }
    if(stripos($v,'userSecure')!=FALSE){
        $cook = preg_split("/[s]+/", $v);
        $result = array_search("userSecure", $cook);
        if($result!=FALSE){
            $cookie['userSecure']=$cook[$result+1];
        }
    }
}
$cookie_str = "userId={$cookie['userId']}; userName={$cookie['userName']}; userGroup={$cookie['userGroup']}; userSecure={$cookie['userSecure']}";
 
//echo $cookie_str;
//echo $ckfile;
//exit();
 
$enword = urlencode($word);
$target_url = "http://ci.aizhan.com/{$enword}/";
$search_header = array ();
$search_header [] = "Host: ci.aizhan.com";
$search_header [] = "User-Agent: " . $user_agent;
$search_header [] = "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
$search_header [] = "Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3";
$search_header [] = "Accept-Encoding: gzip, deflate";
$search_header [] = "Connection: keep-alive";
$search_header [] = "Cookie: " . $cookie_str;
 
//print_r($cookie_str);
 
$curl = curl_init ();
curl_setopt ( $curl, CURLOPT_URL, $target_url );
curl_setopt ( $curl, CURLOPT_USERAGENT, $user_agent );
curl_setopt ( $curl, CURLOPT_HTTPHEADER, $search_header );
curl_setopt ( $curl, CURLOPT_ENCODING, 'gzip, deflate' );
curl_setopt ( $curl, CURLOPT_HEADER, 1 );
curl_setopt ( $curl, CURLOPT_AUTOREFERER, true );
curl_setopt ( $curl, CURLOPT_COOKIEJAR, $ckfile );
//curl_setopt ( $curl, CURLOPT_COOKIEFILE, $ckfile );
curl_setopt ( $curl, CURLOPT_RETURNTRANSFER, true );
curl_setopt ( $curl, CURLOPT_TIMEOUT, 15 );
$html = curl_exec ( $curl );
curl_close ( $curl );
 
if(!preg_match_all('/<td class="blue t_l"><a href="http://www.baidu.com/baidu?word=(.*)</tr>/Uis',$html,$words)){
               echo "error";
    }
 
    $result = array();
    foreach ($words[0] as $k=>$v){
        preg_match_all('/<td(.*)</td>/Uis',$v,$key);
        $result[] = array('word'=>strip_tags($key[0][0]),'index'=>strip_tags($key[0][1]),'record'=>strip_tags($key[0][2]));
    }
print_r($result);
?>

大家可以保存以上代码,在自己的环境中实测一下,看看此代码的关键词采集功能是否真的好用?

>>> 您可能感兴趣的文章:
phpQuery采集网页内容的示例代码
PHP采集远程图片的实例代码
PHP采集程序常用的函数代码
PHP 采集图片函数一例
PHP采集器的简单示例代码
phpQuery采集网页的实例分享
php curl采集Discuz的代码实例
php采集远程图片的思路与实现代码
php采集程序代码(入门)
一个php文本采集类
一个比较全面的截取函数(多用于采集内容的分析)