|
抓取访问的页面http://www.sojump.com/jq/4738641.aspx,代码如下,结果如图...
#-*-coding=utf-8-*-
import requests,json,os,time,re
from bs4 import BeautifulSoup
def get_page():
url = 'http://www.sojump.com/jq/4738641.aspx'
headers = {
"User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) Gecko/20100101 Firefox/41.0',
'Host': 'www.sojump.com',
}
global s
s = requests.session()
req = s.get(url,headers=headers)
with open('mycookie', 'wb') as f:
json.dump(s.cookies.get_dict(), f)
if os.path.exists('mycookie'):
with open('mycookie') as f:
cookie = json.load(f)
s.cookies.update(cookie)
newreq = s.get(url,headers=headers)
with open('my.html', 'w') as f:
f.write(req.content)
else:
print'fail'
get_page()
|
|