|
- import requests
- from bs4 import BeautifulSoup
- url='https://movie.douban.com/chart'
- headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
- f=requests.get(url,headers=headers).text
- soup = BeautifulSoup(f,"lxml")
- div1 = soup.find("div",{"class":"indent"})
- table = div1.find_all("table",{"width":"100%"})
- for t in table:
- print(t.find("div",{"class":"pl2"}).a.text.strip(),"\n",
- t.find("div", {"class": "pl2"}).a['href'],"\n",
- t.find("div",{"class":"pl2"}).p.text,"\n",
- t.find("div",{"class":"pl2"}).div.find("span",{"class":"pl"}).text,"\n",
复制代码 |
|