|
import asyncio
import aiohttp
import requests
from lxml import etree
import aiofiles
# 主函数
title_url = 'https://www.bbiquge.net/book_97146/'
def main():
resp = requests.get(title_url)
resp.encoding = 'gbk'
tree = etree.HTML(resp.text)
href_list = tree.xpath("/html/body/div[4]/dl/dd/a/@href")
asyncio.run(get_url(href_list))
# 请求网页的函数
async def get_url(href_list):
tasks = []
for href in href_list:
content_url = 'https://www.bbiquge.net/book_97146/' + str(href)
tasks.append(asyncio.create_task(download(content_url)))
await asyncio.wait(tasks)
# 下载数据的函数
async def download(content_url):
async with aiohttp.ClientSession() as session:
async with session.get(content_url) as resp:
content_text = await resp.text()
tree = etree.HTML(content_text)
content_list = tree.xpath('/html/body/div[3]/div[2]/div[1]/text()')
for content in content_list:
print(content)
async with aiofiles.open('元尊.txt',mode='w',encoding='gbk') as f:
await f.write(content)
if __name__ == '__main__':
main()
|
|