|
#先找到视频源 iframe 文件中URL
#在url中找到m3u8文件
#下载m3u8文件,并解析文件
#**KEY,下载视频
#通过解析文件下载M3U8中的视频文件,并整合起来
#通过查找资料,发现其中出现一个大错误,由于协程在不同操作系统处理的数据量不同,在windows系统,最大一次处理的协程数量不能大于509个,
# linux系统中,最大协程数量不能超过1024个
#未解决该问题import requests
import re
import urllib3
import asyncio
import aiohttp
import aiofilesdef url_m3u8(url):
url_yuan = "http://www.wwmulu.com/"
obj = re.compile(r'data-src="(?P<url_1>.*?)"><', re.S)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.63"
}
resp = requests.get(url,headers=headers,verify=False) #出现错误后,加上验证通过移除SSL认证
resp.encoding = "utf-8"
#通过url 找到iframe 中的url
url = obj.search(resp.text).group("url_1")
resp.close()
#得到了一个URL,m3u8
#下载并且得到真实的URL 文件,m3u8
resp = requests.get(url,headers=headers,verify=False)
resp.encoding = "utf-8"
#下载到文件里边并打开查找数据with open("./video/first_m3u8.m3u8",mode="wb") as f:
f.write(resp.content)
resp.close()
with open("./video/first_m3u8.m3u8",mode="r",encoding="utf-8") as f:
for line in f:
if line.startswith("#"):
continue
#等到m3u8地址
line =line.strip()
url = url.rsplit("/",3)[0]+line
resp = requests.get(url, headers=headers, verify=False)
resp.encoding = "utf-8"
with open("./video/first_m3u81.m3u8",mode="wb") as f:
f.write(resp.content)
resp.close()
"""
with open("./video/first_m3u81.m3u8", mode="r", encoding="utf-8") as f:
for line in f:
if line.startswith("#"):
continue
print(line.strip())
"""
async def downloads(url,filename_m3u8,n,semaphore):
#请求url
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.102 Safari/537.36 Edg/104.0.1293.63"
}
async with semaphore:
async with aiohttp.ClientSession() as ses:
async with ses.get(url,headers=headers) as resp:
async with aiofiles.open(f"video/{filename_m3u8}",mode="wb") as f1:
resp.encoding = "utf-8"
#print(url,filename_m3u8)
#下载到文档上边
await f1.write(await resp.content.read())
#num_count += 1
print(f"下载完第{n}个")
async def main():
#通过文件找到URL
tasks = []
tasks1 = []
async with aiofiles.open("video/first_m3u81.m3u8", mode="r", encoding="utf-8") as f:
async for line in f:
if line.startswith("#"):
continue
line = line.strip()
#下载url 名字
filename_m3u8 = line.split("hls/")[-1]
semaphore = asyncio.Semaphore(50) # 限制并发量为500
data_async = [asyncio.create_task(downloads(line, filename_m3u8, i, semaphore)) for i in range(1000)] #data_async存放协程对象
print(data_async)
tasks.append(data_async)
await asyncio.wait(tasks)
"""
if num_count<500:
d = asyncio.create_task(downloads(line,filename_m3u8,num_count))
tasks.append(d)
"""
#e = asyncio.create_task(downloads(line,filename_m3u8))
#tasks1.append(e)
#print(len(tasks),len(tasks1))
#await asyncio.wait(data_async)
#await asyncio.wait(tasks1)
if __name__ == '__main__':
#视频源
url = "http://www.wwmulu.com/rj/renleikepa/play-1-1.html"
urllib3.disable_warnings() #消除验证警告
url_m3u8(url)
asyncio.run(main())
#asyncio.get_event_loop().run_until_complete(main())
print("Over!!")
|
|