以下代码实现如下的功能:
抓取网页上的更新,然后把更新内容写进数据库中。
#!/usr/bin/perl -w
use LWP;
use Encode;
use DBI;
my $browser = LWP::UserAgent->new;
my $url='http://www.jb200.com/';
my $file='index.html';
my $result='result'; #the old one
my $new_result='new_result';
print my $time=localtime,":Now getting web,please wait.......n";
my $response=$browser->get($url,':content_file'=>$file);
die "can't get $url --",$response->content_type unless $response->content_type eq 'text/html';
print 'Done.Now,analyzing......',"n";
open FH,"<",$file||die "can't open $file :$!n";
open FILE,">",$new_result||die "can't open $file for write:$!n";
select FILE;
while(<FH>)
{
s/t//;
if(/<tr><td.*?>(d{4}-d{1,2}-d{1,2}).*/ig)
{
print encode("gb2312",decode("gb2312",$1)),"t";
next;
}
if($_=~/<tds+width="48%".*?title="(.*?)".*?=(.*?)s.*/ig)
{
print encode("utf8",decode("gb2312",$1)),"t",encode("utf8",decode("gb2312",$2)),"n";
}
}
close FILE;
select STDOUT;
if((stat $result)[7] == (stat $new_result)[7])
{
print "Not Foundn";
exit(0);
}
open RES,"<",$new_result||die "$!n";
open FH,"<",$result||die "$!n";
my @tmp=<FH>;
my %web;
while(<RES>) #FILE is th new file
{
my $found=1;
foreach my $old_context(@tmp)
{
if($_ eq $old_context)
{
$found=0;
last;
}
next;
}
#print if $found==1;
if($found)
{
my($date,$title,$site)=split /t/,$_;
$site=$url.$site;
$web{$title}->{'date'}=$date;
$web{$title}->{'site'}=$site;
}
}
foreach(keys %web)
{
print $_,"t",$web{$_}->{'date'},"t",$web{$_}->{'site'},"n";
}
close RES;
close FH;
unlink $result||die "can't remove $result:$!n";
rename $new_result,'result'||die "can't rename:$!n";
print 'Do you wang to write to the database:[Y/N]';
chomp(my $choose=<STDIN>);
if($choose eq 'y'||$choose eq 'Y')
{
##########以下为connect数据库-----------------
my $database='DBI:mysql:database=wei;host=127.0.0.1';
my $user='root';
my $pw='123456';
my $dbh=DBI->connect($database,$user,$pw,{'RaiseError'=>1})||die "can't connect to the database:$DBI::errstrn";
foreach(keys %web)
{
my $sql="insert into web(title,date,site) values('$_','$web{$_}->{'date'}','$web{$_}->{'site'}')";
my $sth=$dbh->prepare("$sql");
$sth->execute();
$sth->finish();
}
$dbh->disconnect();
}
exit;