PHP从给定网站抓取省市级城市数据

发布时间:2019-07-29编辑:脚本学堂
分享一例php代码,用于从给定的网站上抓取省市级的城市数据,有需要的朋友参考下。

本节内容:
从给定网站抓取省市级城市数据

完整代码:
 

复制代码 代码示例:

<?php
/**
 * 名称: 抓取省市级城市
 *
 * 功能: 从所给指定的网址中抓取数据并分析出自己想要的数据。
 * 编辑:www.jb200.com
 */
/*
SQL:
CREATE TABLE province_city (
  id int(11) unsigned NOT NULL auto_increment,
  city varchar(200) character set latin1 collate latin1_bin NOT NULL default '',
  parent_id smallint(4) unsigned NOT NULL default '0',
  city_code varchar(10) character set latin1 collate latin1_bin NOT NULL default '',
  PRIMARY KEY  (id),
  KEY parent_id (parent_id),
  KEY city_code (city_code)
) ENGINE=myisam
*/
/*
@取得程序执行的时间微秒
*/
function getMicrotime()
{
   list($usec, $sec) = explode(" ",microtime());
   return ((double)$usec + (double)$sec);
}
/*
@过滤字符串取得需要的值
*/
function filterData($data){
 global $pre;
 $start_len = strpos($data,"=")+1;
 $end_len   = strpos($data,">");
 $len       = $end_len-$start_len;
 $url       = substr($data,$start_len,$len);
 //$tmp       = implode(file($pre.$url));
 $tmp       = getDataFromUrl($pre.$url);
 return $tmp;
}
/*
@取得指定网站上的数据
*/
function getDataFromUrl($url){
 $data  = implode("",file($url));
 $data  = strip_tags($data,"<a>");
 preg_match_all ("/(<([/w]+)[^>]*>)(.*)(<////2>)/", $data, $matches);
 return $matches;
}

set_time_limit(0);
$startTime  = getMicrotime();
$conn  = mysql_connect("localhost","root","");
mysql_select_db("365tag",$conn);
$sql   = "INSERT INTO province_city (id,city, parent_id) VALUES ";
$pre   = "http://bjrd.beijing.gov.cn/life/life_com/code/";
$url   = "http://bjrd.beijing.gov.cn/life/life_com/code/city.asp";
$matches = getDataFromUrl($url);
global $id_num;
$id_num = 0;
for ($i=0; $i<count($matches[0]); $i++) { 
 $id   = !empty($id_num)?($id_num+$i+1):($i+1);
 echo "id_num: ".$id_num."<br>";
 echo "id:".$id."<br>";
 $j    = $i+1;
 echo "<li><p>".$j.".".$matches[3][$i]."</p></li>";

 $sql  .= "('".$id."','".$matches[3][$i]."',0),";

 $city = filterData($matches[1][$i]);
 for($k=0;$k<count($city[0]);$k++){
  unset($id_tmp);
  $id_tmp= $id+$k+1;
  //echo "id_tmp:".$id_tmp."<br>";
  $z     = $k+1;
  $sql  .= "('".$id_tmp."','".$city[3][$k]."','".$id."'),";

  echo $z.".".$city[3][$k];
  echo "<br>";
 }
 echo "<hr>";
 $id_num  = $id_num+count($city[0]);
}
echo "SQL:<BR>".$sql;
echo "<hr>";

$endTime  = getMicrotime();
$execTime = $endTime-$startTime;
echo "<font size=2 color=blue>抓取及分析数据所用时间:".$execTime."</font>";
//开始执行添加数据库的程序
$len_tmp  = strrpos($sql,',');
$sql      = substr($sql,0,$len_tmp).";";
mysql_query($sql) or die(mysql_error());
?>