python日志分析脚本nginx和apache日志分析脚本

发布时间:2020-07-04编辑:脚本学堂
python实现的日志分析脚本,可用于分析nginx日志与apache日志文件,在日常的运维管理中日志分析经常遇到,编写一个日志分析脚本很有需要,可以参考下这里的python日志分析代码。

python日志分析脚本nginxapacherizhi/ target=_blank class=infotextkey>apache日志分析脚本

用python编写日志分析脚本很简单,主要用到re模块与datetime模块。

代码:
 

复制代码 代码示例:
#-*-coding:utf-8-*-
#python—2.7.3写的  win下和centos下测试都能通过
#如果centos的 python版本只有2.4.3的话要载入time模块把时间格式转换的函# #数换一下,下面有介绍
#by:Z-Ping
#mail:251683535@qq.com
import re
#import time   #python版本只有2.4.3 去掉import前面的注释
import datetime
class WebLogFormat:
    def __init__(self, filename):
        self.WebFile = open(filename,'r')
        self.LogFormat = re.compile(r'(?P<origin>d+.d+.d+.d+) '+ r'(?P<identd>-|w+) (?P<auth>-|w+) '
                     +r'[(?P<date>[^[]:]+):(?P<time>d+:d+:d+) (?P<tz>[-+]?dddd)] '
                     +r'"(-|((?P<method>w+) (?P<path>[S]+) (?P<protocol>[^"]+))|[^"]+)" (?P<status>d+) (?P<bytes>-|d+)'
                     +r'( (?P<referrer>-|"[^"]*")( (?P<client>-|"[^"]*")( (?P<cookie>-|"[^"]*"))?)?)?s*Z')
                                                                       
class about_ip(WebLogFormat):
    def __init__(self, filename):
        WebLogFormat.__init__(self, filename)
    #提取日志IP和点击率字典,并由高至低排序
    def get_ipdict(self):
        IP={}
        for i in self.WebFile:
            m = self.LogFormat.search(i)
            IP[m.group('origin')] = IP.get(m.group('origin'), 0) + 1
        IP=sorted(IP.iteritems(), key=lambda c:c[1], reverse=True)
        return IP
                                                      
    #找出关键字记录,不敢往列表和字典里面记录,搞不好就是几百兆上G大
    def get_errorip(self,status,sss):
                                                          
        for i in self.WebFile:
            m = self.LogFormat.search(i)
            #print  m.groupdict()
            if m.groupdict()[status] == sss :
                print i
                                                      
    #分时段查询
    def get_time(self, starttime, endtime):
        print datetime.datetime.now()
        m_format = '%Y%m%d%H%M%S'
        time_format = '%d/%b/%Y:%X'
                                                           
#python版本2.4.3的 换成
#starttime=datetime.datetime.fromtimestamp(time.mktime(time.strptime(s#tarttime,m_format)))
        starttime = datetime.datetime.strptime(starttime,m_format)      #python版本2.4.3的换成#endtime=datetime.datetime.fromtimestamp(time.mktime(time.strptime(end#time,m_format)))
        endtime = datetime.datetime.strptime(endtime,m_format)
        total = 0
        for i in self.WebFile:
            m = self.LogFormat.search(i)
            p = m.group('date') +':'+ m.group('time')           #python版本2.4.3的换成
#ptime=datetime.datetime.fromtimestamp(time.mktime(time.strptime(p,tim#e_format)))
            ptime = datetime.datetime.strptime(p,time_format)
            if ptime >= starttime and ptime <= endtime:
               #print i
                total+=1
                                                                  
        print '总共%s条记录'%total
        print datetime.datetime.now()
        #print m.groupdict()['client']
                                                       
x = about_ip(r'F:access_20130508.log')
x.get_time(starttime='20130508000000', endtime='20130508005959')

有关python re模块与datetime模块的用法,请参考如下链接: