本节内容:
python解析Html文件
1,解析HTML文件
#!/usr/bin/python
#
# site: WWW.jb200.com
import re
filename = "index.html"
newContent = "Hello Python World"
filehandle = open(filename, "r")
data = filehandle.read()
filehandle.close()
matching = re.subn("test", newContent, data)
if matching[1] == 0:
raise "Error while parsing HTML template"
print "Content-Type: text/htmlnn"
print matching[0]
2,解析HTML Web页面
#!/usr/bin/python
#
#site: www.jb200.com
import htmllib, urllib, formatter, sys
def parse(url, formatter):
f = urllib.urlopen(url)
data = f.read()
f.close()
p = htmllib.HTMLParser(formatter)
p.feed(data)
p.close()
fmt = formatter.AbstractFormatter(formatter.DumbWriter(sys.stdout))
parse("index.htm", fmt)