|
我在做简单抓取糗百页面的时候遇到了问题,无论是什么样的循环都不起作用,求哪个大神帮帮我指出问题在哪里。问题出在画线的地方
session = requests.Session()
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5)AppleWebKit 537.36(KHTML,like Gecko) Chrome'}
url = 'http://www.qiushibaike.com'
req = session.get(url,headers=headers)
bs0bj = bs(req.text,'html.parser')
interLink = bs0bj.findAll('a',href=re.compile('^(/8hr/)'))
for link in interLink:
if 'href'in link.attrs:
pages=link.attrs['href']
print (pages)
pages_split = pages.split('2?')
for pages1 in pages_split:
print (pages1)
joke_file = open('c:\笑话.doc','w+')
for page in range(10):
if page>1:
print (page)
page = str(page)
page_link=(url + pages_split[0] + page + '?' + pages_split[1])
print (page_link)
else:
page_link=(url)
req_qiu = session.get(page_link, headers=headers)
bs0bj2 = bs(req_qiu.text, 'html.parser')
jokeList = bs0bj2.findAll('div', {'class': 'content'})
for joke in jokeList:
#if len(joke.get_text())>20:
print ('--'*40)
print (joke.get_text(),file=joke_file)
print(joke.get_text())
time.sleep(0)
# print( qiulink )
joke_file.close()
print ('是否要存盘:\n')
print ('保存请按Y键,退出请按N \n')
save_as=input()
if save_as=='y':
osr_path='笑话.doc'
print ('文件保存在:',os.path.abspath(osr_path))
print ('ok')
else:
file='c:\笑话.doc'
if os.path.exists(file):
os.remove(file)
print ('bye')
else:
print ('bye')
time.sleep(1)
sys.exit()
|
|