一嘿期,結(jié)果
2.思路
3.上源碼
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time
from bs4 import BeautifulSoup
import pymysql
class Amazon(object):
def __inig__(self):
pass
#self.amazon_url = 'https://www.amazon.cn/mn/searchApp?keywords=書(shū)包&searchType=utf8'
#################數(shù)據(jù)庫(kù)#######
def mysql(self):
pass
# conn = pymysql.Connect(host='x',user='x',password='x',database='a',port=x,charset='x')
# cursor=conn.cursor()
# dataname =input('請(qǐng)輸入數(shù)據(jù)庫(kù)名:')
# sql = 'CREATE TABLE IF NOT EXISTS %s(ID INT(10) NOT NULL PRIMARY KEY AUTO_INCREMENT,' \
# 'A TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,' \
# 'COMMODITY VARCHAR(255))ENGINE = INNODB DEFAULT CHARSET=utf8'
# cursor.execute(sql%dataname)
# print('創(chuàng)建成功S直!')
#keyword字段修改搜索
def get_amazon_bag(self):
browser = webdriver.Firefox()
browser.set_window_size(900,900)
timeout = WebDriverWait(browser,10)
time.sleep(3)
url = 'https://www.amazon.cn/mn/searchApp?keywords=書(shū)包&searchType=utf8'
browser.get(url)
timeout.until(EC.presence_of_element_located((By.ID,'atfResults')))
a = 0
try:
while True:
for down in range(0,10000,1000):
browser.execute_script('window,scrollBy(0,{})'.format(down))
time.sleep(2)
print(a)
#if a ==6:
#print(browser.page_source)
html = browser.page_source
soup = BeautifulSoup(html,'lxml')
items = soup.find('ul',attrs={'id':'s-results-list-atf'})
#解析單個(gè)商品
itema = [itema.get_text() for itema in items.find_all('div',class_='s-item-container')]
for item in itema:
name_a = item.strip().replace('\n\n','')[:100] #提取前100個(gè)字斩熊,可以修改
a +=1
print('{}\n{}'.format(a,name_a))
#########數(shù)據(jù)庫(kù)#######
# sql = 'INSERT INTO %s(COMMODITY)VALUES("%s")'
# value=(dataname,name_a)
# cursor.execute(sql%value)
#
# conn.commit()
# print('提交成功')
#
# #item為當(dāng)個(gè)信息商品信息
time.sleep(5)
#點(diǎn)擊下一頁(yè)
timeout.until(EC.presence_of_element_located((By.ID,'pagnNextString'))).click()
time.sleep(4)
####測(cè)試####
# cursor.close()
# conn.close()
# print('測(cè)試關(guān)閉數(shù)據(jù)庫(kù)/游標(biāo)了')
except Exception as e:
print(e)
finally:
pass
#關(guān)閉瀏覽器
# cursor.close()
# conn.close()
# print('關(guān)閉數(shù)據(jù)庫(kù)/游標(biāo)里逆!')
# print('關(guān)閉瀏覽器进胯!')
# browser.quit()
a = Amazon()
a.get_amazon_bag()