python 处理数据代码实例_python正则

python 处理数据代码实例: 发布时间：2020-02-02编辑：脚本学堂

python 处理数据的实现代码，python读写文件，python流程控制，以及python正则过滤的用法，需要的朋友参考下。

python3.2版本实现的根据特定规则，处理数据的一个程序。

用到如下python知识：
1，python读文件
2，python写文件
3，python的流程控制
4，python的for循环
5，python的集合，或字符串里判断是否存在某个元素
6，python的逻辑或，逻辑与
7，python的正则过滤
8，python的字符串忽略空格，和以某个字符串开头和按某个字符拆分成list

python的打开文件的模式：

open 模式：

w     以写方式打开，
a     以追加模式打开 (从 EOF 开始, 必要时创建新文件)
r+     以读写模式打开
w+     以读写模式打开 (参见 w )
a+     以读写模式打开 (参见 a )
rb     以二进制读模式打开
wb     以二进制写模式打开 (参见 w )
ab     以二进制追加模式打开 (参见 a )
rb+    以二进制读写模式打开 (参见 r+ )
wb+    以二进制读写模式打开 (参见 w+ )
ab+    以二进制读写模式打开 (参见 a+ )

处理代码：

复制代码代码示例:

def showtxt(path,outpathname,detailpath):  

    greenpath=r"C:UsersqindongliangDesktoptnstxtgreen.txt";  

    redpath=r"C:UsersqindongliangDesktoptnstxtred.txt";  

    redset=listtxt(redpath)  

    greenset=listtxt(greenpath)  

    print("红色词数量: ",len(redset))  

    print("绿色词数量: ",len(greenset))  

    #符合1条件的内容写入  

    f1=open(r"C:UsersqindongliangDesktoptnstxtresult"+detailpath+"1.txt",encoding="UTF-8",mode="a+")  

    #符合2条件的内容写入  

    f2=open(r"C:UsersqindongliangDesktoptnstxtresult"+detailpath+"2.txt",encoding="UTF-8",mode="a+")  

    #符合3条件的内容写入  

    f3=open(r"C:UsersqindongliangDesktoptnstxtresult"+detailpath+"3.txt",encoding="UTF-8",mode="a+")  

    #符合4条件的内容写入  

    f4=open(r"C:UsersqindongliangDesktoptnstxtresult"+detailpath+"4.txt",encoding="UTF-8",mode="a+")  

    delcount=1;  

    f=open(path,encoding="UTF-8",mode="r+")  

    fnew=open(outpathname,encoding="UTF-8",mode="a+")  

    flog=open(outpathname+".log",encoding="UTF-8",mode="a+")  

    #count=1;  

    for line in f:  

        list=line.strip().split("t")  

        line=line.strip()  

        catalogid=list[0]  

        score=list[1]  

        keyword=clear(list[4].strip())  

        if keyword in redset:  

            if catalogid.startswith("018022") or catalogid.startswith("018035") or catalogid.startswith("014023003") :  

                f1.write(line+"n")#符合1条件写入  

                fnew.write(line+"n")#符合1条件写入  

            else:  

                flog.write(line+"   不符合条件1 "+"n")  

                delcount=delcount+1  

        if keyword in greenset:  

            if not (catalogid.startswith("018022") or catalogid.startswith("018035") or catalogid.startswith("014023003")) :  

                fnew.write(line+"n")  

            else:  

                f2.write(line+"n")  

                flog.write(line+"   不符合条件2"+"n")  

                delcount=delcount+1  

        flist=formatStrList(keyword)  

        if "sexy" in flist or "sex" in flist:  

            if catalogid.startswith("018022") or catalogid.startswith("018035") or catalogid.startswith("014023003") :  

                f3.write(line+"n")  

                fnew.write(line+"n")  

            else:  

                flog.write(line+"  不符合条件3"+"n")  

                delcount=delcount+1  

       #if (keyword.find("underwear")!=-1) & keyword.find("sexy")==-1 & keyword.find("sex")==-1:  

        if "underwear" in flist and "sexy" not in flist and "sex" not  in flist:  

            if catalogid.startswith("014032")  :  

                f4.write(line+"n")  

                fnew.write(line+"n")  

            else:  

                flog.write(line+"  不符合条件4"+"n")  

                delcount=delcount+1  

        #print(list[0],"  ",list[1],"  ",list[4])  

        #print()  

    flog.write("删除总数目： "+str(delcount))  

    f.close()  

    f1.close()  

    f2.close()  

    f3.close()  

    f4.close()  

    fnew.close()  

    flog.close()  

import re  

def clear(str):  

    str=re.sub("[""''+]","",str)  

    return str  

def formatStrList(keyword):  

    list=keyword.split(" ")  

    for item in list:  

        item.strip();  

    return  list  

def listtxt(path):  

     f=open(path,encoding="UTF-8")  

     s=set()  

     for line in f:  

         s.add(line.strip())  

     f.close()  

     return s  

path1=r"C:UsersqindongliangDesktoptnstxthighfrequency.txt"  

pathout1=r"C:UsersqindongliangDesktoptnstxtdetaila_highfrequency.txt"  

detail1path="highfrequency"  

path2=r"C:UsersqindongliangDesktoptnstxthighfrequency_d1.txt"  

pathout2=r"C:UsersqindongliangDesktoptnstxtdetailb_highfrequency_d1.txt"  

detail2path="highfrequency_d1"  

#showtxt(path1,pathout1,detail1path)  

showtxt(path2,pathout2,detail2path)

上一篇：python字符串替换实例代码
下一篇：python检查友情链接可用状态

与 python 处理数据代码实例有关的文章

本文标题：python 处理数据代码实例
本页链接：http://www.jb200.com/article/28381.html

浏览排行

栏目分类

热点文章

python 处理数据代码实例