|
在爬取网站时 使用呢Request之后返回这样的信息
<td><iframe border="0" frameborder="0" id="I1" marginheight="1" marginwidth="1" name="I1" scrolling="No" src="/iframe/foot_800.htm" target="_top" width="100%">浏览器不支持嵌入式框架,或被配置为不显示嵌入式框架。</iframe></td>
源码如下:
from bs4 import BeautifulSoup
import requests
import csv
import time
headers = {
'User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
'Cookie:JSESSIONID=00001_i1x8XBaQj3qAMdO82TlG-:-1; _gscs_892086308=784806351ujwjq16|pv:3; _gscbrs_892086308=1; _gscu_892086308=7848063507o2fe16'
}
url = 'http://knews.shaanxi.gov.cn/IssuedContentAction.do?dispatch=vContentListBySubid&scope=tblist'
def get_url(url,data=None):
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'lxml')
print(soup)
get_url(url)
|
|