# 通過截取前10頁的評論朽褪,看看買kindle paperwhite 4的人中 讽营,買32G的人多洋闽,還是買8G的人多
import requests
import json
import pymysql
import time
conn = pymysql.connect(host='localhost', user='root', passwd='123456', db='mydb', port=3306, charset='utf8') #連接數(shù)據(jù)庫
cursor = conn.cursor()
headers = {
? ? 'Referer':'https://item.jd.com/100000667370.html',
? ? 'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Mobile Safari/537.36'
}
urls = ['https://sclub.jd.com/comment/productPageComments.action?productId=100000667370&score=0&sortType=5&page={}&pageSize=10'.format(str(i)) for i in range(0,10)]
for url in urls:
? ? # print(url)
? ? res = requests.get(url,headers=headers)
? ? # print(res.text)
? ? json_data = json.loads(res.text)
? ? comments = json_data['comments']
? ? for comment in comments:
? ? ? ? content = comment['content']
? ? ? ? creationTime = comment['creationTime']
? ? ? ? nickname = comment['nickname']
? ? ? ? productColor = comment['productColor']
? ? ? ? productSize = comment['productSize']
? ? ? ? score = comment['score']
? ? ? ? # print(content,creationTime,nickname,productColor,productSize,score)
? ? ? ? cursor.execute("insert into kindle_paperwhite4 (content,creationTime,nickname,productColor,productSize,score) values(%s,%s,%s,%s,%s,%s)",
? ? ? ? ? ? ? ? ? ? ? (content,creationTime,nickname,productColor,productSize,score))
? ? ? ? conn.commit()
? ? time.sleep(4)
select_sql = "SELECT productColor,productSize,count(*) from kindle_paperwhite4 group by productColor,productSize"
cursor.execute(select_sql)
result = cursor.fetchall()
cursor.close()
conn.close()
print(result)