|
想到上证所下载股票列表的excel文件
from tokenize import Ignore
from urllib import request
import pandas as pd
#Download A-Stock stock list
#sse_stock_list_url = ‘http://query.sse.com.cn/security ... me=&stockType=1’
sse_stock_list_url = "http://query.sse.com.cn//sseQuery/commonExcelDd.do?sqlId=COMMON_SSE_CP_GPJCTPZ_GPLB_GP_L&type=inParams&CSRC_CODE=&STOCK_CODE=®_PROVINCE=&STOCK_TYPE=1&COMPANY_STATUS=2,4,5,7,8"
request_headers = {'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.37',
'Referer': 'http://www.sse.com.cn/assortment/stock/list/share/'
}
req = request.Request(sse_stock_list_url, headers=request_headers)
resp = request.urlopen(req)
result = resp.read().decode('gb2312') #please use gb2312 to decode otherwise you will not get correct data
print(result)
from io import StringIO
TESTDATA = StringIO(result)
df = pd.read_csv(TESTDATA, sep='\t')
print(df)
.result = resp.read().decode('gb2312') 这句话提示:UnicodeDecodeError: 'gb2312' codec can't decode byte 0xb1 in position 5: illegal multibyte sequence
用utf8也不行,求大师看看是什么原因,谢谢
|
|