perl抓取网页并写入数据库的例子

发布时间:2019-07-26编辑:脚本学堂
以下代码实现如下的功能:抓取网页上的更新,然后把更新内容写进数据库中。

以下代码实现如下的功能:
抓取网页上的更新,然后把更新内容写进数据库中。
 

复制代码 代码如下:

#!/usr/bin/perl -w
use LWP;
use Encode;
use DBI;
my $browser = LWP::UserAgent->new;
my $url='http://www.jb200.com/';
my $file='index.html';
my $result='result'; #the old one
my $new_result='new_result';
print my $time=localtime,":Now getting web,please wait.......n";
my $response=$browser->get($url,':content_file'=>$file);
die "can't get $url --",$response->content_type unless $response->content_type eq 'text/html';
print 'Done.Now,analyzing......',"n";
open FH,"<",$file||die "can't open $file :$!n";
open FILE,">",$new_result||die "can't open $file for write:$!n";
select FILE;
while(<FH>)
{
 s/t//; 
 if(/<tr><td.*?>(d{4}-d{1,2}-d{1,2}).*/ig)
  {
  print encode("gb2312",decode("gb2312",$1)),"t";
  next;
  }

  if($_=~/<tds+width="48%".*?title="(.*?)".*?=(.*?)s.*/ig)
  {
  print encode("utf8",decode("gb2312",$1)),"t",encode("utf8",decode("gb2312",$2)),"n";
  }
}
close FILE;
select STDOUT;
if((stat $result)[7] == (stat $new_result)[7])
{
 print "Not Foundn";
 exit(0);
}
open RES,"<",$new_result||die "$!n";
open FH,"<",$result||die "$!n";
my @tmp=<FH>;
my %web;
while(<RES>) #FILE is th new file
{
 my $found=1;
 foreach my $old_context(@tmp)
 {
  if($_ eq $old_context)
  {
  $found=0;
  last;
  }
  next;
 }
 #print if $found==1;
if($found)
{
my($date,$title,$site)=split /t/,$_;
$site=$url.$site;
$web{$title}->{'date'}=$date;
$web{$title}->{'site'}=$site;
}
}
foreach(keys %web)
{
print $_,"t",$web{$_}->{'date'},"t",$web{$_}->{'site'},"n";
}
close RES;
close FH;
unlink $result||die "can't remove $result:$!n";
rename $new_result,'result'||die "can't rename:$!n";
print 'Do you wang to write to the database:[Y/N]';
chomp(my $choose=<STDIN>);
if($choose eq 'y'||$choose eq 'Y')
{
##########以下为connect数据库-----------------
my $database='DBI:mysql:database=wei;host=127.0.0.1';
my $user='root';
my $pw='123456';
my $dbh=DBI->connect($database,$user,$pw,{'RaiseError'=>1})||die "can't connect to the database:$DBI::errstrn";

foreach(keys %web)
{
 my $sql="insert into web(title,date,site) values('$_','$web{$_}->{'date'}','$web{$_}->{'site'}')";
 my $sth=$dbh->prepare("$sql");
 $sth->execute();
 $sth->finish();
}
$dbh->disconnect();
}
exit;