python pycurl模块二个例子

发布时间:2019-09-06编辑:脚本学堂
python pycurl模块的二个例子,python pycurl模块入门demo,curl常用方法,python pycurl模块读取rss数据保存到数据库。

1、python pycurl模块入门demo
 

复制代码 代码示例:
#!/usr/bin/env python
#
import pycurl
import StringIO
    url='www.plcxue.com'
    c=pycurl.Curl()
    c.setopt(c.URL, url)
    b = StringIO.StringIO()  
    c.setopt(c.WRITEFUNCTION, b.write)
    c.setopt(c.FOLLOWLOCATION, 1)
    c.setopt(c.HEADER, True)
    c.perform()  
    html=b.getvalue()  
    print html
    b.close()
    c.close()
#---
def test(debug_type, debug_msg):
    print "debug(%d): %s" % (debug_type, debug_msg)

2、curl常用方法:
 

复制代码 代码示例:

c.setopt(c.HTTPHEADER, ["Content-Type: application/x-www-form-urlencoded","X-Requested-With:XMLHttpRequest","Cookie:"+set_cookie[0]])
c.setopt(c.REFERER, url)
c.setopt(c.POSTFIELDS, params)
c.setopt(c.VERBOSE, 1)

c.setopt(c.POST, 1)
c.setopt(c.DEBUGFUNCTION, test)  

   url = "http://www.jb200.com"
  
    print "Starting downloading", url
    print
    f = open("body", "wb")
    h = open("header", "wb")
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.WRITEDATA, f)
    c.setopt(c.NOPROGRESS, 0)
    c.setopt(c.PROGRESSFUNCTION, progress)
    c.setopt(c.FOLLOWLOCATION, 1)
    c.setopt(c.MAXREDIRS, 5)
    c.setopt(c.WRITEHEADER, h)
    c.setopt(c.POST, 1)
    c.setopt(c.OPT_FILETIME, 1)
    c.perform() 
  
    print "HTTP-code:", c.getinfo(c.HTTP_CODE)
    print "Total-time:", c.getinfo(c.TOTAL_TIME)
    print "Download speed: %.2f bytes/second" % c.getinfo(c.SPEED_DOWNLOAD)
    print "Document size: %d bytes" % c.getinfo(c.SIZE_DOWNLOAD)
    print "Effective URL:", c.getinfo(c.EFFECTIVE_URL)
    print "Content-type:", c.getinfo(c.CONTENT_TYPE)
    print "Namelookup-time:", c.getinfo(c.NAMELOOKUP_TIME)
    print "Redirect-time:", c.getinfo(c.REDIRECT_TIME)
    print "Redirect-count:", c.getinfo(c.REDIRECT_COUNT)
    epoch = c.getinfo(c.INFO_FILETIME)
    #print "Filetime: %d (%s)" % (epoch, time.ctime(epoch))
    #print
    print "Header is in file 'header', body is in file 'body'"
  
    c.close()
    f.close()
    h.close()

    #print pycurl.version_info()
    url=''
    c=pycurl.Curl()
    c.setopt(pycurl.URL, url);
  
    b = StringIO.StringIO()  
    c.setopt(pycurl.HTTPHEADER, ["Accept:"])

    c.setopt(pycurl.WRITEFUNCTION, b.write)

    c.setopt(pycurl.FOLLOWLOCATION, 2)
    #c.setopt(pycurl.HEADER, True)
    c.setopt(pycurl.MAXREDIRS, 5)
    #c.setopt(pycurl.USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)")
    #c.setopt(pycurl.REFERER, "")
    #c.setopt(pycurl.CONNECTTIMEOUT, 20)#链接超时  
    #c.setopt(pycurl.TIMEOUT, 20)#下载超时
    #c.setopt(pycurl.COOKIEFILE, "cookie_file_name")  
    #c.setopt(pycurl.COOKIEJAR, "cookie_file_name")
    c.perform()  
    #print ret
    html=b.getvalue()  
    print '-----------'
    print html
#--- 代理使用
defgetURLContent_pycurl(url):  
   c = pycurl.Curl()
   c.setopt(pycurl.URL,url)
   b = StringIO.StringIO()
   c.setopt(pycurl.WRITEFUNCTION, b.write)
   c.setopt(pycurl.FOLLOWLOCATION, 1)
   c.setopt(pycurl.MAXREDIRS, 5)
   #代理
   #c.setopt(pycurl.PROXY, 'http://11.11.11.11:8080')
   #c.setopt(pycurl.PROXYUSERPWD, 'aaa:aaa')
   c.perform()
   returnb.getvalue()
url ='http://www.yuju100.com'
content = getURLContent_pycurl(url)
printcontent
 

 
3、python pycurl模块读取rss数据保存到数据库中,pycurl速度比urllib快。

代码:
 

复制代码 代码示例:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import StringIO
import pycurl
html = StringIO.StringIO()
c = pycurl.Curl()
myurl='http://www.plcxue.com'
c.setopt(pycurl.URL, myurl)
#写的回调
c.setopt(pycurl.WRITEFUNCTION, html.write)
c.setopt(pycurl.FOLLOWLOCATION, 1)
#最大重定向次数,可以预防重定向陷阱
c.setopt(pycurl.MAXREDIRS, 5)
#连接超时设置
c.setopt(pycurl.CONNECTTIMEOUT, 60)
c.setopt(pycurl.TIMEOUT, 300)
#模拟浏览器
c.setopt(pycurl.USERAGENT, "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)")
#访问,阻塞到访问结束
c.perform()
#打印出 200(HTTP状态码)
print c.getinfo(pycurl.HTTP_CODE)
#输出网页的内容
print html.getvalue()
#输出网页类型
print "Content-type:", c.getinfo(c.CONTENT_TYPE)