|
from selenium import webdriver
from selenium.common.excepti** import TimeoutException
from selenium.common.excepti** import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditi** as EC
from selenium.webdriver.support.wait import WebDriverWait
from pyquery import PyQuery as pq
from urllib.parse import quote
import csv
chrome_opti** = webdriver.ChromeOpti**()
chrome_opti**.add_argument('--headless')
chrome_opti**.add_argument('log-level=3')
browser = webdriver.Chrome(chrome_opti**=chrome_opti**)
wait = WebDriverWait(browser, 10)
MAX_PAGE=2
KEYWORD='大J小D'
with open('d:/tianmao/result.csv', 'w', newline='') as output:
writer = csv.writer(output)
writer.writerow(('dataid','site','price','deal','title','shop','review','isbn','publisher'))
for i in range(1, MAX_PAGE + 1):
print('正在爬取第', i, '页')
try:
url = 'https://list.tmall.com/search_product.htm?q=' + quote(KEYWORD) + '&sort=d&style=g&from=sn_1_cat-qp&cat=50021913#J_Filter'
browser.get(url)
if i > 1:
myinput = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.ui-page-skip input.ui-page-skipTo')))
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'b.ui-page-skip > form button.ui-btn-s')))
myinput.clear()
myinput.send_keys(i)
submit.click()
wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, 'b.ui-page-num > b.ui-page-cur'), str(i)))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div#J_ItemList.view div.product')))
try:
html = browser.page_source
doc = pq(html)
items = doc('div#J_ItemList.view div.product').items()
for item in items:
dataid = item.find('div#J_ItemList.view div.product').attr('data-id')
site = item.find('div.product-iWrap div.productImg-wrap a').attr('href')
price = item.find('div.product-iWrap p.productPrice > em').attr('title')
deal =item.find('div.product-iWrap p.productStatus :first-child > em').text()
title = item.find('div.product-iWrap p.productTitle a').attr('title')
shop= item.find('div.product-iWrap div.productShop a').text()
review = item.find('div.product-iWrap p.productStatus :nth-child(2) a').text()
except NoSuchElementException:
pass
print(dataid,site,price,deal,title,shop,review)
with open('d:/tianmao/result.csv', 'a', newline='',encoding='gb18030') as output:
writer = csv.writer(output)
writer.writerow((dataid,site,price,deal,title,shop,review))
except TimeoutException:
pass
browser.close()
|
|