测试代码:
复制代码 代码示例:
#像操作文件一样操作字符串,也可以from cStringIO import StringIO,性能应该会好一些
import StringIO
html = StringIO.StringIO()
import pycurl
c = pycurl.Curl()
c.setopt(pycurl.URL, 'http://www.baidu.com')
#写的回调
c.setopt(pycurl.WRITEFUNCTION, html.write)
c.setopt(pycurl.FOLLOWLOCATION, )
#最大重定向次数,可以预防重定向陷阱
c.setopt(pycurl.MAXREDIRS, )
#访问,阻塞到访问结束
c.perform()
#打印出 (HTTP状态码) http://www.baidu.com(生效的url)
print c.getinfo(pycurl.HTTP_CODE), c.getinfo(pycurl.EFFECTIVE_URL)
#输出百度首页的html
#print html.getvalue()
然后,查看多线程,http://pycurl.cvs.sourceforge.net/pycurl/pycurl/tests/的一些例子。
还可以参考:http://pycurl.sourceforge.net/doc/curlmultiobject.html
代码:
复制代码 代码示例:
#!/usr/bin/env
python
#coding=utf-
import threading
import pycurl
from cStringIO import StringIO
class UrlOpen(threading.Thread):
"""异步下载网页"""
def __init__(self):
super(UrlOpen,self).__init__()
self.opener = pycurl.CurlMulti()
self.handle_list=[]
def add(self,url,recall,writer=StringIO()):
"""
参数:网址,
回调函数,存放临时数据的对象
"""
c = pycurl.Curl()
#可以传给回调函数
c.url=url
c.content = writer
c.recall = recall
c.setopt(c.URL,url)
c.setopt(c.WRITEFUNCTION,c.content.write)
self.handle_list.append(c)
self.opener.add_handle(c)
def _remove(self,c):
c.close()
self.opener.remove_handle(c)
self.handle_list.remove(c)
def run(self):
num_handle=len(self.handle_list)
while :
ret = self.opener.select(.)
if ret == -:
continue
while :
num_handle_pre=num_handle
ret, num_handle =self.opener.perform()
#活动的连接数改变时
if num_handle!=num_handle_pre:
result=self.opener.info_read()
print result
for i in result[]:
#成功
i.http_code = i.getinfo(i.HTTP_CODE)
self._remove(i)
i.recall(i)
for i in result[]:
#失败,应该记录一下
self._remove(i)
if ret != pycurl.E_CALL_MULTI_PERFORM:
break
_opener=None
def urlopen(*arg,**key):
global _opener
if _opener is None:
_opener=UrlOpen()
_opener.add(*arg,**key)
_opener.start()
else:
_opener.add(*arg,**key)
def show(x):
print x.content.getvalue()
if __name__=="__main__":
urlopen("http://www.jb200.com/",show)
_opener.join()
例2,python pycurl模块异步打开网页的类与函数。
复制代码 代码示例:
#!/usr/bin/env python
#coding=utf-8
import threading
from cStringIO import StringIO
import pycurl
"""
Asyn open url
Author:zsp@gmail.com
-- :
"""
class UrlOpen(threading.Thread):
"""异步下载网页"""
def __init__(self,):
super(UrlOpen,self).__init__()
self.opener = pycurl.CurlMulti()
self.handle_list=[]
self.waiting=[]
def add(self,url,recall,catch=None,writer=StringIO()):
"""
参数:网址,回调函数,存放临时数据的对象
"""
if catch is None:
def catch(curl,error_no,desp):
#print "Error:%s - %s"%(error_no,desp)
pass
c = pycurl.Curl()
#可以传给回调函数
c.url=url
c.content = writer
c.recall = recall
c.catch=catch
c.setopt(c.URL,
url.encode('utf-') if type(url) is unicode else url
)
c.setopt(c.WRITEFUNCTION,c.content.write)
self.waiting.append(c)
def _add(self):
waiting=self.waiting[:]
self.waiting=[]
for c in waiting:
self.handle_list.append(c)
self.opener.add_handle(c)
def _remove(self,c):
c.close()
self.opener.remove_handle(c)
self.handle_list.remove(c)
def run(self):
import select
import time
num_handle=
while :
if self.handle_list:
ret = self.opener.select(.)
if ret >= :
while :
num_handle_pre=num_handle
ret, num_handle =self.opener.perform()
#活动的连接数改变时
if num_handle!=num_handle_pre:
result=self.opener.info_read()
for i in result[]:
#成功
i.http_code = i.getinfo(i.HTTP_CODE)
self._remove(i)
i.recall(i)
for i in result[]:
#失败,应该记录一下,或回调失败函数
#i为(<pycurl.Curl object at xCC>, , 'Could not resolve host: www.msn.com (Domain name not found)')
i[].catch(*i)
self._remove(i[])
if ret != pycurl.E_CALL_MULTI_PERFORM:
break
else:
time.sleep()
self._add()
_opener=None
def urlopen(*arg,**key):
global _opener
if _opener is None:
_opener=UrlOpen()
_opener.start()
_opener.add(*arg,**key)
if __name__=="__main__":
def show(x):
print x.content.getvalue()
print '--'*
urlopen("http://www.baidu.com/",show)
urlopen("http://www.google.com/",show)
urlopen("http://www.sougou.com/",show)
urlopen("http://www.yodao.com/",show)
urlopen("http://www.yahoo.com/",show)
urlopen("http://www.msn.com/",show)
_opener.join()