例子,python爬虫程序(python urllib2模块,python re模块)
#!/usr/bin/env python
#
import urllib2
import re
response = urllib2.urlopen('http://www.jb200.com/')
text = 'JGood is<title>sdfa</title> a handsome <title> boy, </title>he is cool, clever, and so on...'
text2 = text.replace('y','')
#m = re.search(r'<title>(.*)</title>',response.read())
#m = re.match(r'.*<title>(.*)</title>.*',response.read())
#m = re.match(r'.*<title>(.*)</title>.*',text2)
m = re.search(r'<title>(.*)</title>',text2)
print m.group(1).decode('utf-8','ignore')
#m = re.finditer(r'<title>(.*)</title>',text)
#m = re.finditer(r'<title>([^<title>]*)</title>',text) ///匹配不能包含<title>中任意字符的一个。
m = re.finditer(r'<title>((.(?!<title>))*.)</title>',text) ///匹配不是<title>的字符串。