|
题主救我,我试着想用同样的代码爬“http://yz.chsi.com.cn/zsml/queryAction.do”这里的内容,但是报错。
报错内容为:Traceback (most recent call last):
File "C:/Users/2233/Desktop/广东省研究生照收大学(医学).py", line 36, in <module>
main()
File "C:/Users/2233/Desktop/广东省研究生照收大学(医学).py", line 34, in main
FillList(unifo , html)
File "C:/Users/2233/Desktop/广东省研究生照收大学(医学).py", line 19, in FillList
Slist.append([tds[0].string , tds[1].string , tds[2].string , tds[3].string , tds[4].string , tds[5].string ])
IndexError: list index out of range
下面是我的代码:
import requests
from bs4 import BeautifulSoup
import bs4
def GetHtmlTxt(url):
try:
r = requests.get(url , timeout = 30 )
r.raise_for_status()
r.recoding = r.apparent_encoding
return r.text
except:
return ""
def FillList(Slist , html):
soup = BeautifulSoup( html , "html.parser")
for tr in soup.find("tbody").descendants:
if isinstance(tr ,bs4.element.Tag ):
tds = tr("td")
Slist.append([tds[0].string , tds[1].string , tds[2].string , tds[3].string , tds[4].string , tds[5].string ])
def OutPutList(Slist , num):
print("{:^10}\t{:^6}\t{:^10}\t{:3^}\t{:3^}\t{:3^}".format("招生大学","所在地址","学校特性","研究生院","计划","Doctor"))
for i in range (num):
u = Slist
print("{:^10}\t{:^6}\t{:^10}\t{:3^}\t{:3^}\t{:3^}".format(u[0],u[1],u[2],u[3],u[4],u[5]))
def main():
unifo = []
url = "http://yz.chsi.com.cn/zsml/queryAction.do"
html = GetHtmlTxt(url)
FillList(unifo , html)
OutPutList(unifo , 11 )
main() |
|