|
import urllib2
from bs4 import BeautifulSoup
f=open("szweather.txt","w")
for m in range(1,13):
for d in range(1,32):
if (m==2 and d>28):
break
elif (m in [4,6,9,11] and d>30):
break
timestamp='2013'+str(m)+str(d)
print "getting data for"+timestamp
url="http://www.wunderground.com/history/airport/ZGSZ/2013/"+str(m)+"/"+str(d)+"/DailyHistroy.html"
page=urllib2.urlopen(url)
soup=BeautifulSoup(page)
dayTemp=soup.findAll(attrs={"class":"wx-value"})[2].span.string
if len(str(m))<2:
mStamp='0'+str(m)
else:
mStamp=str(m)
if len(str(d))<2:
dStamp='0'+str(d)
else:
dStamp=str(d)
timestamp='2013'+mStamp+dStamp
f.write(timstamp+','+dayTemp+'\n')
f.close()
运行后提示:Traceback (most recent call last): File "C:\Users\dream\Desktop\get-weather-data.py", line 20, in
dayTemp=soup.findAll(attrs={"class":"wx-value"})[2].span.stringIndexError: list index out of range求解!!
|
|