Python基础教程之统计目录中所有文件数与行数

发布时间:2019-10-20编辑:脚本学堂
本文介绍了python统计目录中所有文件数与文件行数的方法,一个不错的Python基础教程实例代码,有需要的朋友参考下。

python代码包括三个函数:
统计单个文件行数的函数line_count()
统计一个目录下的所有文件file_count()
统计一个目录下的所有文件的行数count_all_lines()
添加了一个简单过滤机制,以便统计特定文件类型的行数。

例子,python统计目录中所有文件数与行数的代码。
 

复制代码 代码示例:
# -*- coding: utf-8 -*-
import time
import os
from datetime import datetime
from datetime import timedelta
 
def line_count(filename):
    '''Count file's lines neglect 'n' '''
    count=0
    #print filename
    for line in open(filename):
        if(line!='n'):count+=1
    return count
 
def file_count(dirname,filter_types=[]):
    '''Count the files in a directory includes its subfolder's files
       You can set the filter types to count specific types of file'''
    count=0
    filter_is_on=False
    if filter_types!=[]: filter_is_on=True
    for item in os.listdir(dirname):
        abs_item=os.path.join(dirname,item)
        #print item
        if os.path.isdir(abs_item):
            #Iteration for dir
            count+=file_count(abs_item,filter_types)
        elif os.path.isfile(abs_item):
            if filter_is_on:
                #Get file's extension name
                extname=os.path.splitext(abs_item)[1]
                if extname in filter_types:
                    count+=1
            else:
                count+=1
    return count
def file_changed_count(dirname,base_time,filter_types=[]):
    '''Count the files in a directory includes its subfolder's files.
       You can set the filter types to count specific types of file.
       And set basetiem to count the file if it's modified time is over base_time'''
    count=0
    filter_is_on=False
    if filter_types!=[]: filter_is_on=True
    for item in os.listdir(dirname):
        abs_item=os.path.join(dirname,item)
        if os.path.isdir(abs_item):
            #Iteration for dir
            count+=file_count(abs_item,filter_types)
        elif os.path.isfile(abs_item):
            mt=datetime.fromtimestamp(os.stat(abs_item)[8])
            if mt>base_time:
                if filter_is_on:
                    #Get file's extension name
                    extname=os.path.splitext(abs_item)[1]
                    if extname in filter_types:
                        count+=1
                else:
                    count+=1
    return count
 
def count_all_lines(dirname,filter_types=[]):
    '''Count all files' lines of specific types in one directory includes its
       subdirectories.'''
    count=0
    filter_is_on=False
    if filter_types!=[]: filter_is_on=True
 
    for item in os.listdir(dirname):
        abs_item=os.path.join(dirname,item)
        if os.path.isdir(abs_item):
            count+=count_all_lines(abs_item,filter_types)
        elif os.path.isfile(abs_item):
            if filter_is_on:
                #Get file's extension name
                extname=os.path.splitext(abs_item)[1]
                if extname in filter_types:
                    count+=line_count(abs_item)
            else:
                count+=line_count(abs_item)
    return count
 
 
if __name__=='__main__':
 
    s=time.clock()
    srcdir=r"C:work"
    #Set file's filter types
    cs_type=['.cs']
    pas_type=['.pas']
    filter_types=['.pas']
 
    #count=file_count(srcdir,filter_types)
    print 'Counting ...'
    base_time=datetime.now()-timedelta(1)
    #count=count_all_lines(srcdir,base_time)
    cc=file_count(srcdir,filter_types)
    #count=count_all_lines(srcdir,filter_types)
 
    print cc
    s=time.clock()-s
    print 'Time is : %.2f seconds'%s