perl在处理文件方面的能力还是很强的,处理行数较少的文件直接循环遍历就可以了,对于较大的文件则有些吃力。
不过可以借助perl的一些模块来实现,大致解决方案分为三种:DB_File、Tie::File(Tie:File:AsHash)、然后就是DBI(先将数据导入数据库,然后从数据库中读取文件)。
本人认为用数据库存取数据时比较理想的方法,先来看如下的示例。
#!/usr/bin/perl -w
use strict;
use DBI;
die "n",&die(),"n" unless @ARGV==7;
my ($tissue,$cpg_forw,$cpg_rev,$chg_forw,$chg_rev,$chh_forw,$chh_rev)=@ARGV;
my ($driver,$dsn,$root,$psword)=("mysql","database=methylome","root","123456");
my $dbh=DBI->connect("dbi:$driver:$dsn",$root,$psword) or die "$DBI::errstr";
$dbh->do(qq(create table forw_CpG_$tissue(chrom VARCHAR(11),pos1 INT,pos2 INT,depth INT,meth_lev float)));
$dbh->do(qq(load data local infile '$cpg_forw' into table forw_CpG_$tissue));
$dbh->do(qq(create table rev_CpG_$tissue(chrom VARCHAR(11),pos1 INT,pos2 INT,depth INT,meth_lev float)));
$dbh->do(qq(load data local infile '$cpg_rev' into table rev_CpG_$tissue));
$dbh->do(qq(create table forw_CHG_$tissue(chrom VARCHAR(11),pos1 INT,pos2 INT,depth INT,meth_lev float)));
$dbh->do(qq(load data local infile '$chg_forw' into table forw_CHG_$tissue));
$dbh->do(qq(create table rev_CHG_$tissue(chrom VARCHAR(11),pos1 INT,pos2 INT,depth INT,meth_lev float)));
$dbh->do(qq(load data local infile '$chg_rev' into table rev_CHG_$tissue));
$dbh->do(qq(create table forw_CHH_$tissue(chrom VARCHAR(11),pos1 INT,pos2 INT,depth INT,meth_lev float)));
$dbh->do(qq(load data local infile '$chh_forw' into table forw_CHH_$tissue));
$dbh->do(qq(create table rev_CHH_$tissue(chrom VARCHAR(11),pos1 INT,pos2 INT,depth INT,meth_lev float)));
$dbh->do(qq(load data local infile '$chh_rev' into table rev_CHH_$tissue));
$dbh->disconnect();
sub die{
my $die=<<DIE;
perl *.pl <Tissue_name> <forw_CpG> <rev_CpG> <Forw_CHG> <Rev_CHG> <Forw_CHH> <Rev_CHH>
DIE
return $die;
}
#--------------------------
#!/usr/bin/perl -w
use strict;use DBI;
my $startTime=localtime();
die "n",&die(),"n" unless @ARGV==4;
my ($forw,$rev,$ele,$out)=@ARGV;
select STDOUT;
$|=1;
my ($driver,$dsn,$user,$psword)=("mysql","database=methylome","root","123456");
my $dbh=DBI->connect("dbi:$driver:$dsn",$user,$psword);
print "Connected!n";
my %meth_forw;my %nufor;my $meth_rev;my $nurev;my $flag=1;
open ELE,$ele or die;
while(my $line=<ELE>){
chomp $line;
print "$flag have done!!!n" if $flag00==0;$flag++;
my ($chr,$element,$nu1,$nu2,$stran)=(split(/s+/,$line))[0,2,3,4,6]; # chromosome,gene element,eadge1,eadge2,strand
&aa($element,$nu1,$nu2,$chr,$stran);
}
open OUT,"+>$out" or die;
my @sort=sort (keys %meth_forw);
foreach(@sort){
my $methsta=$meth_forw{$_}/$nufor{$_};
print OUT "$_t$methstat$meth_forw{$_}t$nufor{$_}n";
}
my $endTime=localtime();
print OUT "$startTimet$endTimen";
close OUT;
sub aa{
my $strd=pop @_; # strand
my $chrs=pop @_; #chromosome
my $elem=shift @_; #element
my $flag=0;
#positive strand
foreach my $seg(@_){
my $tem=$dbh->prepare(qq(select * from $forw where chrom='$chrs' and pos1>=$seg-100 and pos1<=$seg+100));
$tem->execute();
my ($chr,$pos1,$pos2,$depth,$meth_lev);
$tem->bind_columns($chr,$pos1,$pos2,$depth,$meth_lev);
while($tem->fetch()){
next if $depth<4;my $pkey;
if($strd eq "+"){
next if !$pos1;
$pkey=$pos1-$seg;
$nufor{"forw$elem.$flag.$pkey"}++;
$meth_forw{"forw$elem.$flag.$pkey"}+=$meth_lev;
}else{
next if !$pos1;
$pkey=$seg-$pos1;
my $flag_tem=0;
$flag_tem=1 if $flag==0;
$nufor{"forw$elem.$flag_tem.$pkey"}++;
$meth_forw{"forw$elem.$flag_tem.$pkey"}+=$meth_lev;
}
}
++$flag; # used to remind which eadge
}
#reverse stands
foreach my $seg(@_){
my $tem=$dbh->prepare(qq(select * from $rev where chrom='$chrs' and pos1>=$seg-100 and pos1<=$seg+100));
$tem->execute();
my ($chr,$pos1,$pos2,$depth,$meth_lev);
$tem->bind_columns($chr,$pos1,$pos2,$depth,$meth_lev);
while($tem->fetch()){
next if $depth<4;my $pkey;
if($strd eq "+"){
next if !$pos1;
$pkey=$pos1-$seg;
$nufor{"rev$elem.$flag.$pkey"}++;
$meth_forw{"rev$elem.$flag.$pkey"}+=$meth_lev;
}else{
next if !$pos1;
$pkey=$seg-$pos1;
my $flag_tem=0;
$flag_tem=1 if $flag==0;
$nufor{"rev$elem.$flag_tem.$pkey"}++;
$meth_forw{"rev$elem.$flag_tem.$pkey"}+=$meth_lev;
}
}
++$flag; # used to remind which eadge
}
}
$dbh->disconnect();
sub die{
my $die=<<DIE;
perl *.pl <Forward_table> <Reverse_table> <Element> <OUT>
The data has been loaded!
DIE
return $die;
}