本次目標(biāo)是將韓國(guó)所有商品id導(dǎo)出至csv
源代碼
import requests
from lxml import etree
import csv
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"
}
klook_id = []
def get_id(city_id, city_name):
presence = 1
while presence == 1:
url_seoul = 'https://www.klook.com/zh-CN/city/'+str(city_id)+'-'+city_name
for j in range(1, 23):
payload = {
'city_id': str(city_id),
'page': str(j)
}
res = requests.get(url_seoul, params=payload, headers=headers).text
if res:
html = etree.HTML(res)
for i in range(1, 16):
kr_id = html.xpath('//*[@id="filter-card-content"]/div[' + str(i) + ']/a/@data-id')
if kr_id:
if city_name == 'seoul':
klook_id.append([kr_id[0], 'shouer'])
if city_name == 'jeju':
klook_id.append([kr_id[0], 'jizhoudao'])
if city_name == 'busan':
klook_id.append([kr_id[0], 'fushan'])
if city_name == 'gangwon-do':
klook_id.append([kr_id[0], 'jiangyuandao'])
if city_name == 'gyeonggi-do':
klook_id.append([kr_id[0], 'jingjidao'])
if city_name == 'incheon':
klook_id.append([kr_id[0], 'renchuan'])
else:
presence = 0
get_id(13, 'seoul')
get_id(18, 'jeju')
get_id(46, 'busan')
get_id(156, 'gangwon-do')
get_id(157, 'gyeonggi-do')
get_id(158, 'incheon')
kr_list = []
for i in range(0, len(klook_id)):
kr_list.append([klook_id[i][0], 'kr', klook_id[i][1]])
title_list = ["kr_id", "Na", "city"]
with open("kr_id.csv", "w", newline='') as t:
writer = csv.writer(t)
writer.writerow(title_list)
writer.writerows(kr_list)