想要爬取大众点评网上的火锅商家的信息,却被告诉有错误,下面的是代码:
import urllib2
from bs4 import BeautifulSoup import xlwt
url='http://www.dianping.com/chengdu/ch10/g110px' wbk=xlwt.Workbook() sheet=wbk.add_sheet('sheet 1',cell_overwrite_ok=True) sheet.write(0,0,'title') sheet.write(0,1,'comment') sheet.write(0,2,'tag-address') sheet.write(0,3,'recommend') webpage=urllib2.urlopen(url) soup=BeautifulSoup(webpage,'lxml',from_encoding='utf-8') m=1 i=1 while i<=5: a=list(url) a[-1]=i b=''.join('%s' %id for id in a)#这一步相当重要 webpage=urllib2.urlopen(b) html_doc=webpage.read() soup=BeautifulSoup(html_doc,'html.parser',from_encoding='utf-8') links=soup.find_all('div',class_='txt') for link1 in links: for link2 in link1.find_all('div',class_='tit'): sheet.write(m,1,link2.get_text()) for link3 in link1.find_all('div',class_='comment'): sheet.write(m,2,link3.get_text()) for link4 in link1.find_all('div',class_='tag-address'): sheet.write(m,3,link4.get_text()) for link5 in link1.find_all('div',class_='recommend'): sheet.write(m,4,link5.get_text()) m=m+1 i=i+1 wbk.save(r'C:\Users\123\Desktop/hotpot.xls') Spyder上的错误信息是: runfile('C:/Users/123/Desktop/爬虫练习/大众点评网.py', wdir='C:/Users/123/Desktop/爬虫练习')
Traceback (most recent call last): File "<ipython-input-8-456082ca6ef2>", line 1, in <module>
runfile('C:/Users/123/Desktop/爬虫练习/大众点评网.py', wdir='C:/Users/123/Desktop/爬虫练习') File "C:\Users\123\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 585, in runfile
execfile(filename, namespace) File "C:/Users/123/Desktop/爬虫练习/大众点评网.py", line 19, in <module>
webpage=urllib2.urlopen(url) File "C:\Users\123\Anaconda\lib\urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout) File "C:\Users\123\Anaconda\lib\urllib2.py", line 410, in open
response = meth(req, response) File "C:\Users\123\Anaconda\lib\urllib2.py", line 523, in http_response
'http', request, response, code, msg, hdrs) File "C:\Users\123\Anaconda\lib\urllib2.py", line 448, in error
return self._call_chain(*args) File "C:\Users\123\Anaconda\lib\urllib2.py", line 382, in _call_chain
result = func(*args) File "C:\Users\123\Anaconda\lib\urllib2.py", line 531, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) HTTPError: Forbidden 请大家看看哪有问题
|