python统计nginx日志中访问ip地址与流量

发布时间:2019-11-29编辑:脚本学堂
如何用python分析nginx日志,用python统计出nginx日志中的IP地址、访问流量是多少,统计nginx日志的访问ip和流量以及显示ip的归属地,支持分时间段统计。

python实现nginx日志统计与分析:

统计nginx日志的访问ip和流量,并根据淘宝IP地址库,计算并显示ip的归属地,支持分时间段统计。

1、文件nginx_log.py,代码:    
 

复制代码 代码示例:

#!/bin/env python
#coding:utf-8
#统计nginx的访问ip和流量
#具有时间段分析功能

import sys
import ip_location
import time
import re

reload(sys)
sys.setdefaultencoding('utf-8')
#time_start=sys.argv[1]
#time_start=sys.argv[2]
ipflow={}
ipnum={}

#nginx日志
log_file="/data/logs/lolo.log"

#时间的正则和格式
re_time='d{2}/w{3}/d{4}:d{2}:d{2}:d{2}'
str_time='%d/%b/%Y:%H:%M:%S'
 
#"时间段"
class TimeParser(object):
    def __init__(self,re_time,str_time):
        self.re_time=re.compile(re_time)
        self.str_time=str_time
    def get(self,line):
        t=re.search(self.re_time,line).group(0)
        return time.mktime(time.strptime(t,self.str_time))
    def inPeriod(self,line):
        t=self.get(line)
        return (t>time.mktime(time.strptime(start_time,self.str_time)) and t<time.mktime(time.strptime(end_time,self.str_time)))
#处理函数
class ParseLog(object):
    def __init__(self,file_name):
        self.file_name=file_name
        self.re_time=re.compile(re_time)
        self.srt_time=str_time
    def show(self):
        fd=open(self.file_name,"r")
        contens=fd.readlines()
        fd.close()
        Time=TimeParser(self.re_time,self.srt_time)
        for line in contens:
            if Time.inPeriod(line):
                ip=line.split()[1]
                flow=line.split()[10]
                #采用集合
                if ip in set(k.lower() for k in ipflow):
                    ipnum[ip]+=1
                    ipflow[ip]=int(ipflow[ip])+int(flow)
                else:
                    ipnum[ip]=1
                    ipflow[ip]=int(flow)
        for k in ipnum:
            name=ip_location.ip_location(k)
            print "访问IP:%s 访问次数:%d 访问流量%.3fK 归属地:%s" %(k,int(ipnum[k]),ipflow[k],name)
if __name__ == "__main__":
    if len(sys.argv) != 3:
        print "输入的参数错误"
        sys.exit()
    start_time=sys.argv[1]
    end_time=sys.argv[2]
    p=ParseLog(log_file)
    p.show()

2、文件 p_location.py    
 

复制代码 代码示例:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# the script is used to query the location of every ip

#淘宝库ip地址查询
import urllib
import json
import sys

#淘宝ip库接口
url = "http://ip.taobao.com/service/getIpInfo.php?ip="
def ip_location(ip):
        data = urllib.urlopen(url + ip).read()
        datadict=json.loads(data)
        for oneinfo in datadict:
                if "code" == oneinfo:
                        if datadict[oneinfo] == 0:
                                return datadict["data"]["country"] + datadict["data"]["region"] + datadict["data"]["city"] + "tt" + datadict["data"]["isp"]
 
if __name__ ==  "__main__":
        ip=sys.argv[1]
        name=ip_location(ip)
        print name