#-*-coding:utf-8-*-
#python—2.7.3写的 win下和
centos下测试都能通过
#如果centos的 python版本只有2.4.3的话要载入time模块把时间格式转换的函# #数换一下,下面有介绍
#by:Z-Ping
#mail:251683535@qq.com
import re
#import time #python版本只有2.4.3 去掉import前面的注释
import datetime
class WebLogFormat:
def __init__(self, filename):
self.WebFile = open(filename,'r')
self.LogFormat = re.compile(r'(?P<origin>d+.d+.d+.d+) '+ r'(?P<identd>-|w+) (?P<auth>-|w+) '
+r'[(?P<date>[^[]:]+):(?P<time>d+:d+:d+) (?P<tz>[-+]?dddd)] '
+r'"(-|((?P<method>w+) (?P<path>[S]+) (?P<protocol>[^"]+))|[^"]+)" (?P<status>d+) (?P<bytes>-|d+)'
+r'( (?P<referrer>-|"[^"]*")( (?P<client>-|"[^"]*")( (?P<cookie>-|"[^"]*"))?)?)?s*Z')
class about_ip(WebLogFormat):
def __init__(self, filename):
WebLogFormat.__init__(self, filename)
#提取日志IP和点击率字典,并由高至低排序
def get_ipdict(self):
IP={}
for i in self.WebFile:
m = self.LogFormat.search(i)
IP[m.group('origin')] = IP.get(m.group('origin'), 0) + 1
IP=sorted(IP.iteritems(), key=lambda c:c[1], reverse=True)
return IP
#找出关键字记录,不敢往列表和字典里面记录,搞不好就是几百兆上G大
def get_errorip(self,status,sss):
for i in self.WebFile:
m = self.LogFormat.search(i)
#print m.groupdict()
if m.groupdict()[status] == sss :
print i
#分时段查询
def get_time(self, starttime, endtime):
print datetime.datetime.now()
m_format = '%Y%m%d%H%M%S'
time_format = '%d/%b/%Y:%X'
#python版本2.4.3的 换成
#starttime=datetime.datetime.fromtimestamp(time.mktime(time.strptime(s#tarttime,m_format)))
starttime = datetime.datetime.strptime(starttime,m_format) #python版本2.4.3的换成#endtime=datetime.datetime.fromtimestamp(time.mktime(time.strptime(end#time,m_format)))
endtime = datetime.datetime.strptime(endtime,m_format)
total = 0
for i in self.WebFile:
m = self.LogFormat.search(i)
p = m.group('date') +':'+ m.group('time') #python版本2.4.3的换成
#ptime=datetime.datetime.fromtimestamp(time.mktime(time.strptime(p,tim#e_format)))
ptime = datetime.datetime.strptime(p,time_format)
if ptime >= starttime and ptime <= endtime:
#print i
total+=1
print '总共%s条记录'%total
print datetime.datetime.now()
#print m.groupdict()['client']
x = about_ip(r'F:access_20130508.log')
x.get_time(starttime='20130508000000', endtime='20130508005959')