|
- import requests
- from lxml import etree
- url = 'http://www.ruiwen.com/wenxue/zhuziqing/419754.html'
- def getcontent(url):
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
- }
- resp**e = requests.get(url, headers=headers)
- resp**e.encoding = "gbk"
- html = etree.HTML(resp**e.text)
- content = html.xpath('/html/body//div[@class="content"]/*/text()')
- return content
- datalist = getcontent(url)
- for line in datalist:
- print(line.strip('\u3000') + "\r")
复制代码 |
|