|
课题组师兄给我了一个代码,是他用来下载其他数据的。现在我要在NASA上下载一些气象数据,想知道怎么修改。跪求大佬教教我,5555555555555555555
代码如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import re
import datetime
import os, requests, time, threading
from queue import Queue
import Download_process
Start=datetime.date(2016,8,4)
End=datetime.date(2016,9,1)
d=Start
delta=datetime.timedelta(days=8)
new=[]
while d<=End:
if d.month==8:
Y=d.year
M=d.month
D=d.day
new.append(str(Y)+"."+str(M).zfill(2)+"."+str(D).zfill(2)) ##字符串不足位补0#数据存在new中
d += delta
else:
d=datetime.date(d.year+1,8,5)
######################下载所有hdf后缀文件 ##单日下载
#加入Cookie和User-Agent信息
Cookieinfo = "DATA=Xv70HKcyMEwO9mb-7HnKRwAAAS8"
User = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0"
headers = {'Cookie': Cookieinfo,
'User-Agent': User
}
for Num in new:
rawurl = "https://e4ftl01.cr.usgs.gov/MOLT/MOD15A2H.006/"+Num+"/"
resp = requests.get(rawurl, headers=headers).text
soup = BeautifulSoup(resp, 'lxml')
link = soup.find_all('a') # 类似于前面介绍的方法,获取文件的url
link_dup = []
for i in link:
a = i.get('href')
link_dup.append(a)
link_all = list(set(link_dup)) # 列表重复了一遍,可以用set进行去重,再重新排序
link_all.sort(key=link_dup.index)
screen_data = []
for item in link_all:
if re.findall(r"M[A-Z0-9.]{1,17}h2[1-9]v0[3-7][0-9.]{2,22}hdf$", item): # 正则表达式
screen_data.append(item)
crawler_url = []
for i in screen_data:
a = rawurl + i
crawler_url.append(a)
for i, url in enumerate(crawler_url):
start = time.perf_counter()
Download_process.single_thread_download(url, "J:/MODIS/miss/" + url.split('/')[-1], headers)
end = time.perf_counter()
print('[Message] Running time: %s Seconds' % (end - start))
print("The file " + url.split('/')[-1] + " has been downloaded!!")
|
|