python异步协程爬取数据时候，协程数量过多？出现报错

gdmmdfh · 发表于 2022-8-29 17:56:39

#先找到视频源 iframe 文件中URL
#在url中找到m3u8文件
#下载m3u8文件，并解析文件
#**KEY,下载视频
#通过解析文件下载M3U8中的视频文件，并整合起来
#通过查找资料，发现其中出现一个大错误，由于协程在不同操作系统处理的数据量不同，在windows系统，最大一次处理的协程数量不能大于509个，
#                                                             linux系统中，最大协程数量不能超过1024个
#未解决该问题import requests
import re
import urllib3
import asyncio
import aiohttp
import aiofilesdef url_m3u8(url):
url_yuan = "http://www.wwmulu.com/"
obj = re.compile(r'data-src="(?P<url_1>.*?)"><', re.S)
headers = {
      "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.63"
}
resp = requests.get(url,headers=headers,verify=False) #出现错误后，加上验证通过移除SSL认证
resp.encoding = "utf-8"
#通过url 找到iframe 中的url
url = obj.search(resp.text).group("url_1")
resp.close()
#得到了一个URL，m3u8
#下载并且得到真实的URL 文件，m3u8
resp = requests.get(url,headers=headers,verify=False)
resp.encoding = "utf-8"
#下载到文件里边并打开查找数据with open("./video/first_m3u8.m3u8",mode="wb") as f:
      f.write(resp.content)
      resp.close()
with open("./video/first_m3u8.m3u8",mode="r",encoding="utf-8") as f:
      for line in f:
         if line.startswith("#"):
            continue
#等到m3u8地址
line =line.strip()
url = url.rsplit("/",3)[0]+line
resp = requests.get(url, headers=headers, verify=False)
resp.encoding = "utf-8"
with open("./video/first_m3u81.m3u8",mode="wb") as f:
      f.write(resp.content)
      resp.close()
"""
with open("./video/first_m3u81.m3u8", mode="r", encoding="utf-8") as f:
      for line in f:
         if line.startswith("#"):
            continue
         print(line.strip())
"""
async def downloads(url,filename_m3u8,n,semaphore):
#请求url
headers = {
      "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.63"
}
async with semaphore:
      async with aiohttp.ClientSession() as ses:
         async with ses.get(url,headers=headers) as resp:
            async with aiofiles.open(f"video/{filename_m3u8}",mode="wb") as f1:
                  resp.encoding = "utf-8"
#print(url,filename_m3u8)
                  #下载到文档上边
await f1.write(await resp.content.read())
      #num_count += 1
print(f"下载完第{n}个")

async def main():
#通过文件找到URL
tasks = []
tasks1 = []
async with aiofiles.open("video/first_m3u81.m3u8", mode="r", encoding="utf-8") as f:
      async for line in f:
         if line.startswith("#"):
            continue
line = line.strip()
         #下载url  名字
filename_m3u8 = line.split("hls/")[-1]
         semaphore = asyncio.Semaphore(50)  # 限制并发量为500
data_async = [asyncio.create_task(downloads(line, filename_m3u8, i, semaphore)) for i in range(1000)] #data_async存放协程对象
print(data_async)
         tasks.append(data_async)
      await asyncio.wait(tasks)
      """
         if num_count<500:
            d = asyncio.create_task(downloads(line,filename_m3u8,num_count))
            tasks.append(d)
      """
#e = asyncio.create_task(downloads(line,filename_m3u8))
            #tasks1.append(e)
      #print(len(tasks),len(tasks1))
      #await asyncio.wait(data_async)
      #await asyncio.wait(tasks1)
if __name__ == '__main__':
#视频源
url = "http://www.wwmulu.com/rj/renleikepa/play-1-1.html"
urllib3.disable_warnings()  #消除验证警告
url_m3u8(url)
asyncio.run(main())
#asyncio.get_event_loop().run_until_complete(main())
print("Over！！")

		自动登录	找回密码
密码			立即注册

[代码与实例] python异步协程爬取数据时候，协程数量过多？出现报错