|
请大佬们帮忙看一下多线程爬虫为什么会卡死,截图是爬取的最后一个url,一直卡在这里不动了。
from concurrent.futures import ThreadPoolExecutor
def get_data(url):
print(url)
return(pd.read_html(url,encoding='gbk',header=0)[0])
if __name__ == '__main__':
urls=['url1','url2','url3']
executor = ThreadPoolExecutor(max_workers = 16)
thread_list = [executor.submit(get_data, x) for x in urls]
for i in range(len(thread_list)):
try:
print("线程名:",thread_list)
data.append(thread_list.result())
except:
pass
t2=time.time()
print(t2-t1)
·
|
|