python分析apache网站日志web日志的代码

发布时间:2020-03-20编辑:脚本学堂
有关用python分析apache日志的几段代码,python脚本分析apache中的web日志,也用于分析nginx与iis日志文件等,需要的朋友参考下。

python分析apache网站日志的代码

1、分析日志的python框架linuxjishu/13830.html target=_blank class=infotextkey>awk.py
 

复制代码 代码示例:


# Custom awk.py module 

class controller: 
    def __init__(self, f): 
        self.m_file = f 
        self.m_handlers = [] 
 
    def subscribe(self, o): 
        self.m_handlers.append(o) 
    def run(self): 
        for o in self.m_handlers: 
            o.begin() 
        s = self.m_file.readline() 
        while s != "": 
            for o in self.m_handlers: 
                o.process_line(s) 
            s = self.m_file.readline() 
 
        for o in self.m_handlers: 
            o.end() 
 
    def print_results(self): 
        print 
        print "Results:" 
        print 
        for o in self.m_handlers: 
            print "------------------------------------------------------" 
            print o.description() 
            print "------------------------------------------------------" 
            print o.result() 

2、统计日志的点击量count_line.py
 

复制代码 代码示例:
# Standard sys module 
import sys 
# Custom awk.py module 
import awk 
class count_lines: 
    def begin(self): 
        self.m_count = 0 
    def process_line(self, s): 
        self.m_count += 1 
    def end(self): 
        pass 
    def description(self): 
        return "# of lines in the file" 
    def result(self): 
        return self.m_count 
 

# Step 1: Create the Awk controller 

ac = awk.controller(sys.stdin) 

# Step 2: Subscribe the handler 

ac.subscribe(count_lines()) 

# Step 3: Run 

ac.run() 

# Step 4: Print the results 

ac.print_results() 

3、使用方法是shell中执行
 

复制代码 代码示例:
# cat apachelog.log|python count_lines.py

4、统计浏览次数超过n次的访问者  visitors.py
 

复制代码 代码示例:
import re; 
import sys 
imort awk 
class return_visitors: 
    def __init__(self, n): 
        self.m_n = n; 
        self.m_ip_days = {}; 
    def begin(self): 
        pass; 
    def process_line(self, s): 
        try: 
            array = s.split(); 
            ip = array[0]; 
            day = array[3][1:7]; 
            if self.m_ip_days.has_key(ip): 
                if day not in self.m_ip_days[ip]: 
                    self.m_ip_days[ip].append(day); 
            else: 
                self.m_ip_days[ip] = []; 
                self.m_ip_days[ip].append(day); 
        except IndexError: 
            pass; 
 
    def end(self): 
        ips = self.m_ip_days.keys(); 
        count = 0; 
        for ip in ips: 
            if len(self.m_ip_days[ip]) > self.m_n: 
                count += 1; 
        self.m_count = count; 
 
    def description(self): 
        return "# of IP addresses that visited more than %s days" % self.m_n; 
    def result(self): 
        return self.m_count; 
ac = awk.controller(sys.stdin) 
ac.subscribe(return_visitors(2)) 
ac.run() 
ac.print_results() 
# cat apachelog.log|python visitors.py

5、按照域名统计访问量domain.py
 

复制代码 代码示例:
import re; 
import sys 
imort awk 
class referring_domains: 
    def __init__(self): 
        self.m_domains = {}; 
    def begin(self): 
        pass; 
    def process_line(self, line): 
        try: 
            array = line.split(); 
            referrer = array[10]; 
            m = re.search('//[a-zA-Z0-9-.]*.[a-zA-z]{2,3}/', 
                      referrer); 
            length = len(m.group(0)); 
            domain = m.group(0)[2:length-1]; 
            if self.m_domains.has_key(domain): 
                self.m_domains[domain] += 1; 
            else: 
                self.m_domains[domain] = 1; 
        except AttributeError: 
            pass; 
        except IndexError: 
            pass; 
 
    def end(self): 
        pass; 
 
    def description(self): 
        return "Referring domains"; 
 
    def sort(self, key1, key2): 
        if self.m_domains[key1] > self.m_domains[key2]: 
            return -1; 
        elif self.m_domains[key1] == self.m_domains[key2]: 
            return 0; 
        else: 
            return 1; 
 
    def result(self): 
        s = ""; 
        keys = self.m_domains.keys(); 
        keys.sort(self.sort); 
        for domain in keys: 
            s += domain; 
            s += " "; 
            s += str(self.m_domains[domain]); 
            s += "n"; 
        s += "nn"; 
        return s; 
ac = awk.controller(sys.stdin) 
ac.subscribe(referring_domains()) 
ac.run() 
ac.print_results() 
# cat apachelog.log|python domain.py