python pycurl模块sock5代理抓取网页

发布时间:2020-10-29编辑:脚本学堂
python pycurl模块与urllib2模块、socket模块结合抓取网页内容,pycurl模块入门实例。

代码:
 

复制代码 代码示例:

#!/usr/bin/env python

import urllib2
import socket
import pycurl #引入python pycurl模块
import traceback

DOWNLOADED_FILE = '1.txt'
url = "http://www.jb200.com"
socket.setdefaulttimeout(5)
crl = pycurl.Curl()
crl.setopt(pycurl.URL, url)
crl.setopt(pycurl.FOLLOWLOCATION, 1)
crl.setopt(pycurl.PROXY, "211.108.62.231:8888")
crl.setopt(pycurl.PROXYTYPE, 5)
crl.setopt(pycurl.PROXYUSERPWD, "dm:0422")
outfile = file(DOWNLOADED_FILE, 'wb')
crl.setopt(pycurl.WRITEFUNCTION, outfile.write)
try:
    ret = crl.perform()
except Exception, e:
    traceback.print_exc()
HCODE = crl.getinfo(crl.HTTP_CODE)
if HCODE == 200:
    print "down file succeful"
elif HCODE == 404:
    print "file not find"
else:
    print "unknow error", HCODE
outfile.close()
crl.close()