?????? 我使用Python 抓取過(guò)一些網(wǎng)站數(shù)據(jù)感论,如:美團(tuán)、點(diǎn)評(píng)紊册、一畝田比肄、租房等;這些數(shù)據(jù)并沒(méi)有用作商業(yè)用途而是個(gè)人興趣爬取下來(lái)做練習(xí)使用湿硝;這里我已? 一畝田為例使用scrapy框架去抓取它的數(shù)據(jù)薪前。
一畝田
?????? 它是一個(gè)農(nóng)產(chǎn)品網(wǎng)站,匯集了中國(guó)大部分農(nóng)產(chǎn)品產(chǎn)地和市場(chǎng)行情关斜,發(fā)展初期由百度系的人員創(chuàng)建历帚,最初是招了大量的業(yè)務(wù)員去農(nóng)村收集和教育農(nóng)民把產(chǎn)品信息發(fā)布到一畝田網(wǎng)上..伟端。
一畝田一開(kāi)始是網(wǎng)頁(yè)版逛漫,由于爬蟲(chóng)太多和農(nóng)戶在外勞作使用不方便而改成APP版廢棄網(wǎng)頁(yè)版肺稀,一畝田App反爬能力非常強(qiáng)悍;另外一畝田有一畝田產(chǎn)地行情和市場(chǎng)行情網(wǎng)頁(yè)版丁稀,它的信息量也非常多吼拥,所以我選擇爬取一畝田產(chǎn)地行情數(shù)據(jù)。
?????? 爬取一畝田使用的是Scrapy框架线衫,這個(gè)框架的原理及dome我在這里不講凿可,直接給爬取一畝田的分析思路及源碼;
一畝田爬蟲(chóng)分析思路
?????? 首先登陸一畝田產(chǎn)地行情:http://hangqing.ymt.com/chandi授账,看到農(nóng)產(chǎn)品分類
單擊水果分類就能看到它下面有很多小分類枯跑,單擊梨進(jìn)入水果梨的行情頁(yè),能看到它下面有全部品種和指定地區(qū)選擇一個(gè)省就能看到當(dāng)天的行情和一個(gè)月的走勢(shì)白热;
看到這一連串的網(wǎng)頁(yè)我就根據(jù)這個(gè)思路去抓取數(shù)據(jù)敛助。
一畝田爬蟲(chóng)源碼
1.首先創(chuàng)建一個(gè)Spider
2.行情數(shù)據(jù)
?????? 抓取大類、中類屋确、小類纳击、品種? hangqing.py
importscrapy
frommySpider.items import MyspiderItem
fromcopy import deepcopy
importtime
classHangqingSpider(scrapy.Spider):
??? name = "hangqing"
??? allowed_domains =["hangqing.ymt.com"]
??? start_urls = (
??????? 'http://hangqing.ymt.com/',
??? )
??? #大分類數(shù)據(jù)
??? def parse(self, response):
??????? a_list =response.xpath("http://div[@id='purchase_wrapper']/div//a[@class='hide']")
??????? for a in a_list:
??????????? items = MyspiderItem()
??????????? items["ymt_bigsort_href"]= a.xpath("./@href").extract_first()
??????????? items["ymt_bigsort_id"] =??????????????????????? ?????? items["ymt_bigsort_href"].replace("http://hangqing.ymt.com/common/nav_chandi_","")
??????????? items["ymt_bigsort_name"]= a.xpath("./text()").extract_first()
??????????? #發(fā)送詳情頁(yè)的請(qǐng)求
??????????? yield scrapy.Request(
???????????????items["ymt_bigsort_href"],
???????????????callback=self.parse_medium_detail,
??????????????? meta={"item":deepcopy(items)}
??????????? )
??????????? #發(fā)送下一頁(yè)的請(qǐng)求(使用xpath 獲取下一頁(yè)地址)
??????????? # next_url = response.xpath("下一頁(yè)xpatn")
??????????? # if next_url is not None:
??????????? #???? yield scrapy.Request(
??????????? #???????? next_url,
??????????? #???????? callback=self.parse
??????????? #???? )
??? #中分類數(shù)據(jù)? 其中小類也包含在其中
??? def parse_medium_detail(self, response):
??????? items = response.meta["item"]
??????? li_list =response.xpath("http://div[@class='cate_nav_wrap']//a")
??????? for li in li_list:
??????????? items["ymt_mediumsort_id"]= li.xpath("./@data-id").extract_first()
???????????items["ymt_mediumsort_name"] =li.xpath("./text()").extract_first()
??????????? yield scrapy.Request(
???????????????items["ymt_bigsort_href"],
??????????????? callback=self.parse_small_detail,
??????????????? meta={"item":deepcopy(items)},
??????????????? dont_filter=True
??????????? )
??? #小分類數(shù)據(jù)
??? def parse_small_detail(self, response):
??????? item = response.meta["item"]
??????? mediumsort_id =item["ymt_mediumsort_id"]
??????? if int(mediumsort_id) > 0:
??????????? nav_product_id ="nav-product-" + mediumsort_id
??????????? a_list = response.xpath("http://div[@class='cate_content_1']//div[contains(@class,'{}')]//ul//a".format(nav_product_id))
??????????? for a in a_list:
???????????????item["ymt_smallsort_id"] =a.xpath("./@data-id").extract_first()
???????????????item["ymt_smallsort_href"] = a.xpath("./@href").extract_first()
???????????????item["ymt_smallsort_name"] =a.xpath("./text()").extract_first()
??????????????? yield scrapy.Request(
???????????????????item["ymt_smallsort_href"],
???????????????????callback=self.parse_variety_detail,
????? ??????????????meta={"item":deepcopy(item)}
??????????????? )
??? #品種數(shù)據(jù)
??? def parse_variety_detail(self, response):
??????? item = response.meta["item"]
??????? li_list =response.xpath("http://ul[@class='all_cate clearfix']//li")
??????? if len(li_list) > 0:
??????????? for li in li_list:
???????????????item["ymt_breed_href"] =li.xpath("./a/@href").extract_first()
???????????????item["ymt_breed_name"] =li.xpath("./a/text()").extract_first()
??????????????? item["ymt_breed_id"]= item["ymt_breed_href"].split("_")[2]
??????????????? # time.sleep(1)
??????????????? yield item
??????????????? # print(item)
??????? else:
??????????? item["ymt_breed_href"] =""
??????????? item["ymt_breed_name"] =""
??????????? item["ymt_breed_id"] = -1
??????????? # time.sleep(1)
??????????? yield item
??????????? # print(item)
3.產(chǎn)地?cái)?shù)據(jù)
?????? 抓取省份、城市攻臀、縣市? chandi.py
?????? # -*- coding: utf-8 -*-
importscrapy
frommySpider.items import MyspiderChanDi
fromcopy import deepcopy
classChandiSpider(scrapy.Spider):
??? name = 'chandi'
??? allowed_domains = ['hangqing.ymt.com']
??? start_urls =['http://hangqing.ymt.com/chandi_8031_0_0']
??? #省份數(shù)據(jù)
??? def parse(self, response):
??????? #產(chǎn)地列表
??????? li_list =response.xpath("http://div[@class='fl sku_name']/ul//li")
?? ?????for li in li_list:
??????????? items = MyspiderChanDi()
???????????items["ymt_province_href"] =li.xpath("./a/@href").extract_first()
??????????? items["ymt_province_id"]= items["ymt_province_href"].split("_")[-1]
??????????? items["ymt_province_name"]= li.xpath("./a/text()").extract_first()
??????????? yield scrapy.Request(
???????????????items["ymt_province_href"],
???????????????callback=self.parse_city_detail,
??????????????? meta={"item":deepcopy(items)}
??????????? )
??? #城市數(shù)據(jù)
??? def parse_city_detail(self, response):
??????? item = response.meta["item"]
??????? option =response.xpath("http://select[@class='location_select'][1]//option")
??????? if len(option) > 0:
??????????? for op in option:
??????????????? name = op.xpath("./text()").extract_first()
??????????????? if name != "全部":
???????????????????item["ymt_city_name"] = name
???????????????????item["ymt_city_href"] =op.xpath("./@data-url").extract_first()
???????????????????item["ymt_city_id"] = item["ymt_city_href"].split("_")[-1]
??????????????????? yield scrapy.Request(
???????????????????????item["ymt_city_href"],
???????????????????????callback=self.parse_area_detail,
??????????????????????? meta={"item":deepcopy(item)}
??????????????????? )
??????? else:
?????? ?????item["ymt_city_name"] =""
??????????? item["ymt_city_href"] =""
??????????? item["ymt_city_id"] = 0
??????????? yield scrapy.Request(
???????????????item["ymt_city_href"],
???????????????callback=self.parse_area_detail,
??????????????? meta={"item":deepcopy(item)}
??????????? )
??? #縣市數(shù)據(jù)
??? def parse_area_detail(self, response):
??????? item = response.meta["item"]
??????? area_list =response.xpath("http://select[@class='location_select'][2]//option")
??????? if len(area_list) > 0:
??????????? for area in area_list:
??????????????? name =area.xpath("./text()").extract_first()
??????????????? if name != "全部":
???????????????????item["ymt_area_name"] = name
???????????????????item["ymt_area_href"] = area.xpath("./@data-url").extract_first()
???????????????????item["ymt_area_id"] =item["ymt_area_href"].split("_")[-1]
??????????????????? yield item
??????? else:
??????????? item["ymt_area_name"] =""
??????????? item["ymt_area_href"] =""
??????????? item["ymt_area_id"] = 0
??????????? yield item
4.行情分布
location_char.py
?????? #-*- coding: utf-8 -*-
import scrapy
import pymysql
import json
from copy import deepcopy
from mySpider.items importMySpiderSmallProvincePrice
import datetime
class LocationCharSpider(scrapy.Spider):
??? name = 'location_char'
??? allowed_domains = ['hangqing.ymt.com']
??? start_urls = ['http://hangqing.ymt.com/']
??? i = datetime.datetime.now()
??? dateKey = str(i.year) + str(i.month) +str(i.day)
??? db = pymysql.connect(
??????? host="127.0.0.1", port=3306,
??????? user='root', password='mysql',
??????? db='ymt_db', charset='utf8'
??? )
??? def parse(self, response):
??????? cur = self.db.cursor()
??????? location_char_sql = "selectsmall_id from ymt_price_small where dateKey = {} and day_avg_price >0".format(self.dateKey)
??????? cur.execute(location_char_sql)
??????? location_chars = cur.fetchall()
??????? for ch in location_chars:
??????????? item = MySpiderSmallProvincePrice()
??????????? item["small_id"] = ch[0]
??????????? location_char_url ="http://hangqing.ymt.com/chandi/location_charts"
??????????? small_id =str(item["small_id"])
??????????? form_data = {
??????????????? "locationId":"0",
??????????????? "productId": small_id,
??????????????? "breedId":"0"
??????????? }
??????????? yield scrapy.FormRequest(
??????????????? location_char_url,
??????????????? formdata=form_data,
??????????????? callback=self.location_char,
??????????????? meta={"item":deepcopy(item)}
??? ????????)
??? def location_char(self, response):
??????? item = response.meta["item"]
??????? html_str = json.loads(response.text)
??????? status = html_str["status"]
??????? if status == 0:
??????????? item["unit"] =html_str["data"]["unit"]
?????????? ?item["dateKey"] = self.dateKey
??????????? dataList =html_str["data"]["dataList"]
??????????? for data in dataList:
??????????????? if type(data) == type([]):
???????????????????item["province_name"] = data[0]
???????????????????item["province_price"] = data[1]
??????????????? elif type(data) == type({}):
???????????????????item["province_name"] = data["name"]
???????????????????item["province_price"] = data["y"]
??????????????? location_char_url ="http://hangqing.ymt.com/chandi/location_charts"
??????????????? small_id =str(item["small_id"])
??????????????? province_name =str(item["province_name"])
??????????????? province_id_sql = "selectprovince_id from ymt_1_dim_cdProvince where province_name = \"{}\"".format(province_name)
??????????????? cur = self.db.cursor()
??????????????? cur.execute(province_id_sql)
??????????????? province_id = cur.fetchone()
??????????????? item["province_id"] =province_id[0]
??????????????? province_id = str(province_id[0])
??????????????? form_data = {
??????????????????? "locationId":province_id,
??????????????????? "productId":small_id,
??????????????????? "breedId":"0"
??????????????? }
??????????????? yield scrapy.FormRequest(
??????????????????? location_char_url,
??????????????????? formdata=form_data,
???????????????????callback=self.location_char_province,
??????????????????? meta={"item":deepcopy(item)}
??????????????? )
??? def location_char_province(self, response):
??????? item = response.meta["item"]
??????? html_str = json.loads(response.text)
??????? status = html_str["status"]
??????? if status == 0:
??????????? dataList =html_str["data"]["dataList"]
??????????? for data in dataList:
??????????????? if type(data) == type([]):
??????????????????? item["city_name"]= data[0]
???????????????????item["city_price"] = data[1]
??????????????? elif type(data) == type({}):
??????????????????? item["city_name"]= data["name"]
????????????????? ??item["city_price"] =data["y"]
??????????????? location_char_url ="http://hangqing.ymt.com/chandi/location_charts"
??????????????? small_id =str(item["small_id"])
??????????????? city_name =str(item["city_name"])
??????????????? city_id_sql = "selectcity_id from ymt_1_dim_cdCity where city_name = \"{}\"".format(city_name)
??????????????? cur = self.db.cursor()
??????????????? cur.execute(city_id_sql)
??????????????? city_id = cur.fetchone()
??????????????? item["city_id"] =city_id[0]
??????????? ????city_id = str(city_id[0])
??????????????? form_data = {
??????????????????? "locationId":city_id,
??????????????????? "productId":small_id,
??????????????????? "breedId":"0"
??????????????? }
??????????????? yield scrapy.FormRequest(
??????????????????? location_char_url,
??????????????????? formdata=form_data,
???????????????????callback=self.location_char_province_city,
??????????????????? meta={"item":deepcopy(item)}
??????????????? )
??? def location_char_province_city(self,response):
??????? item = response.meta["item"]
??????? html_str = json.loads(response.text)
??????? status = html_str["status"]
??????? if status == 0:
??????????? dataList =html_str["data"]["dataList"]
??????????? for data in dataList:
??????????????? if type(data) == type([]):
??????????????????? item["area_name"]= data[0]
???????????????????item["area_price"] = data[1]
??????????????? elif type(data) == type({}):
??????????????????? item["area_name"]= data["name"]
??????????????? ????item["area_price"] =data["y"]
??????????????? area_name =item["area_name"]
??????????????? area_id_sql = "selectarea_id from ymt_1_dim_cdArea where area_name = \"{}\"".format(area_name)
??????????????? cur1 = self.db.cursor()
??????????????? cur1.execute(area_id_sql)
??????????????? area_id = cur1.fetchone()
??????????????? item["area_id"] =area_id[0]
??????????????? breed_id_sql = "selectbreed_id from ymt_all_info_sort where small_id = {} and breed_id >0".format(item["small_id"])
??????????????? cur1.execute(breed_id_sql)
??????????????? breed_ids = cur1.fetchall()
??????????????? # print(len(breed_ids))
??????????????? location_char_url ="http://hangqing.ymt.com/chandi/location_charts"
??????????????? for breed_id in breed_ids:
??????????????????? item["breed_id"] =breed_id[0]
??????????????????? form_data = {
??????????????????????? "locationId":str(item["city_id"]),
??????????????????????? "productId":str(item["small_id"]),
??????????????????????? "breedId":str(breed_id[0])
??????????????????? }
??????????????????? # print(form_data,breed_id)
??????????????????? yield scrapy.FormRequest(
??????????????????????? location_char_url,
??????????????????????? formdata=form_data,
???????????????????????callback=self.location_char_province_city_breed,
??????????????????????? meta={"item":deepcopy(item)}
??????????????????? )
??? def location_char_province_city_breed(self,response):
??????? item = response.meta["item"]
??????? html_str = json.loads(response.text)
??????? status = html_str["status"]
??????? if status == 0:
??????????? dataList =html_str["data"]["dataList"]
??????????? for data in dataList:
??????????????? if type(data) == type([]):
???????????????????item["breed_city_name"] = data[0]
???????????????????item["breed_city_price"] = data[1]
??????????????? elif type(data) == type({}):
???????????????????item["breed_city_name"] = data["name"]
???????????????????item["breed_city_price"] = data["y"]
??????????????? # print(item)
???????? ???????yield item
5.價(jià)格走勢(shì)
pricedata.py
?????? # -*- coding: utf-8 -*-
importscrapy
importpymysql.cursors
fromcopy import deepcopy
frommySpider.items import MySpiderSmallprice
importdatetime
importjson
classPricedataSpider(scrapy.Spider):
??? name = 'pricedata'
??? allowed_domains = ['hangqing.ymt.com']
??? start_urls =['http://hangqing.ymt.com/chandi_8031_0_0']
??? i = datetime.datetime.now()
??? def parse(self, response):
??????? db = pymysql.connect(
??????????? host="127.0.0.1",port=3306,
??????????? user='root', password='mysql',
??????????? db='ymt_db', charset='utf8'
??????? )
??????? cur = db.cursor()
??????? all_small_sql = "select distinctsmall_id,small_name,small_href from ymt_all_info_sort"
??????? cur.execute(all_small_sql)
??????? small_all = cur.fetchall()
??????? for small in small_all:
??????????? item = MySpiderSmallprice()
??????????? item["small_href"] =small[2]
??????????? # item["small_name"] =small[1]
??????????? item["small_id"] =small[0]
??????????? yield scrapy.Request(
??????????????? item["small_href"],
??????????????? callback=self.small_breed_info,
??????????????? meta={"item":deepcopy(item)}
??????????? )
??? def small_breed_info(self, response):
??????? item = response.meta["item"]
??????? item["day_avg_price"] =response.xpath("http://dd[@class='c_origin_price']/p[2]//span[1]/text()").extract_first()
??????? item["unit"] =response.xpath("http://dd[@class='c_origin_price']/p[2]//span[2]/text()").extract_first()
??????? item["dateKey"] =str(self.i.year)+str(self.i.month)+str(self.i.day)
??????? if item["day_avg_price"] isNone:
??????????? item["day_avg_price"] = 0
??????????? item["unit"] =""
??????? yield item
6.設(shè)計(jì)字典
?????? items.py
#Define here the models for your scraped items
#
# Seedocumentation in:
#http://doc.scrapy.org/en/latest/topics/items.html
importscrapy
# 行情爬蟲(chóng)字段
classMyspiderItem(scrapy.Item):
??? ymt_bigsort_href = scrapy.Field()
??? ymt_bigsort_id = scrapy.Field()
??? ymt_bigsort_name = scrapy.Field()
??? ymt_mediumsort_id = scrapy.Field()
??? ymt_mediumsort_name = scrapy.Field()
??? ymt_smallsort_id = scrapy.Field()
??? ymt_smallsort_href = scrapy.Field()
??? ymt_smallsort_name = scrapy.Field()
??? ymt_breed_id = scrapy.Field()
??? ymt_breed_name = scrapy.Field()
??? ymt_breed_href = scrapy.Field()
# 產(chǎn)地爬蟲(chóng)字段
classMyspiderChanDi(scrapy.Item):
??? ymt_province_id = scrapy.Field()
??? ymt_province_name = scrapy.Field()
??? ymt_province_href = scrapy.Field()
??? ymt_city_id = scrapy.Field()
??? ymt_city_name = scrapy.Field()
??? ymt_city_href = scrapy.Field()
??? ymt_area_id = scrapy.Field()
??? ymt_area_name = scrapy.Field()
??? ymt_area_href = scrapy.Field()
# 小類產(chǎn)地價(jià)格
classMySpiderSmallprice(scrapy.Item):
??? small_href = scrapy.Field()
??? small_id = scrapy.Field()
??? day_avg_price = scrapy.Field()
??? unit = scrapy.Field()
??? dateKey = scrapy.Field()
# 小分類省份/城市/縣市 價(jià)格
classMySpiderSmallProvincePrice(scrapy.Item):
?? ?small_id = scrapy.Field()
??? unit = scrapy.Field()
??? province_name = scrapy.Field()
??? province_price = scrapy.Field()? #小類 省份 均價(jià)
??? province_id = scrapy.Field()
??? city_name = scrapy.Field()
??? city_price = scrapy.Field()????? #小類 城市 均價(jià)
??? city_id = scrapy.Field()
??? area_name = scrapy.Field()
??? area_price = scrapy.Field()????? #小類 縣市均價(jià)
??? area_id = scrapy.Field()
??? breed_city_name = scrapy.Field()
??? breed_city_price = scrapy.Field()
??? breed_id = scrapy.Field()
??? dateKey = scrapy.Field()
7.數(shù)據(jù)入庫(kù)
?????? pipelines.py
?????? # -*- coding: utf-8 -*-
frompymongo import MongoClient
importpymysql.cursors
classMyspiderPipeline(object):
??? def open_spider(self, spider):
??????? # client =MongoClient(host=spider.settings["MONGO_HOST"],port=spider.settings["MONGO_PORT"])
??????? # self.collection =client["ymt"]["hangqing"]
??????? pass
??? def process_item(self, item, spider):
??????? db = pymysql.connect(
??????????? host="127.0.0.1",port=3306,
??????????? user='root', password='mysql',
??????????? db='ymt_db', charset='utf8'
??????? )
??????? cur = db.cursor()
??????? if spider.name == "hangqing":
??????????? #所有 分類數(shù)據(jù)
??????????? all_sort_sql = "insert intoymt_all_info_sort(big_id, big_name, big_href, " \
?????????????????????????? "medium_id,medium_name, " \
?????????????????????????? "small_id,small_name, small_href, " \
?????????????????????????? "breed_id,breed_name, breed_href) " \
??????????????????????????"VALUES({},\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\",\"{}\")".format(
???????????????item["ymt_bigsort_id"], item["ymt_bigsort_name"],item["ymt_bigsort_href"],
???????????????item["ymt_mediumsort_id"],item["ymt_mediumsort_name"],
???????????????item["ymt_smallsort_id"], item["ymt_smallsort_name"],item["ymt_smallsort_href"],
??????????????? item["ymt_breed_id"],item["ymt_breed_name"], item["ymt_breed_href"])
??????????? try:
??????????????? cur.execute(all_sort_sql)
??????????????? db.commit()
??????????? except Exception as e:
??????????????? db.rollback()
??????????? finally:
??????????????? cur.close()
??????????????? db.close()
??????????? return item
??????? elif spider.name == "chandi":
??????????? #所有的產(chǎn)地?cái)?shù)據(jù)
??????????? all_cd_sql = "insert intoymt_all_info_cd(" \
???????????????????????? "province_id,province_name, province_href, " \
???????????????????????? "city_id,city_name, city_href," \
???????????????????????? "area_id,area_name, area_href) " \
???????????????????????? "VALUES({},\"{}\",\"{}\",{},\"{}\",\"{}\",{},\"{}\",\"{}\")".format(
???????????????item["ymt_province_id"], item["ymt_province_name"],item["ymt_province_href"],
??????????????? item["ymt_city_id"],item["ymt_city_name"], item["ymt_city_href"],
??????????????? item["ymt_area_id"],item["ymt_area_name"], item["ymt_area_href"])
??????????? try:
??????????????? #產(chǎn)地?cái)?shù)據(jù)
??????????????? cur.execute(all_cd_sql)
??????????????? db.commit()
??????????? except Exception as e:
??????????????? db.rollback()
??????????? finally:
??????????????? cur.close()
??????????????? db.close()
??????????? return item
??????? elif spider.name =="pricedata":
??????????? avg_day_price_sql = "insertinto ymt_price_small(small_href, small_id, day_avg_price, unit, dateKey) "\
??????????? ????????????????????"VALUES(\"{}\",{},{},\"{}\",\"{}\")".format(item["small_href"],item["small_id"], item["day_avg_price"],item["unit"], item["dateKey"])
??????????? try:
??????????????? cur.execute(avg_day_price_sql)
??????????????? db.commit()
??????? ????except Exception as e:
??????????????? db.rollback()
??????????? finally:
??????????????? cur.close()
??????????????? db.close()
??????? elif spider.name =="location_char":
??????????? location_char_sql = "insertinto ymt_price_provice(small_id, province_name, provice_price, city_name,city_price, area_name, area_price,unit, dateKey, area_id, city_id, provice_id,breed_city_name, breed_city_price, breed_id) " \
???????????????????????????????"VALUES({},\"{}\",{},\"{}\",{},\"{}\",{},\"{}\",{},{},{},{},\"{}\",{},{})".format(item["small_id"],item["province_name"], item["province_price"],item["city_name"], item["city_price"],
???????????????????????????????????????????????????????????????????????????????????????????item["area_name"], item["area_price"],item["unit"], item["dateKey"],
???????????????????????????????????????????????????????????????????????????????????????????item["area_id"], item["city_id"],item["province_id"],
???????????????????????????????????????????????????????????????????????????????????????????item["breed_city_name"],item["breed_city_price"], item["breed_id"])
??????????? try:
??????????????? cur.execute(location_char_sql)
??????????????? db.commit()
??????????? except Exception as e:
??????????????? db.rollback()
??????????? finally:
??????????????? cur.close()
??????????????? db.close()
??????? else:
??????????? cur.close()
??????????? db.close()
?????? 處于個(gè)人興趣焕数,最后把爬取下來(lái)的農(nóng)產(chǎn)品信息變成了一個(gè)WEB系統(tǒng)。