簡(jiǎn)單爬蟲借尿,爬取基金信息国旷,使用了mysql數(shù)據(jù)庫(kù)矛物,代碼如下:
import pymysql
from urllib.error import HTTPError
from urllib.request import urlopen
from bs4 import BeautifulSoup
import uuid
import datetime
#建立數(shù)據(jù)庫(kù)連接
conn = pymysql.connect(host='127.0.0.1', user='root', passwd='1234', db='mysql',charset='utf8')
cur = conn.cursor()
cur.execute("USE fund")
def getFundNumStr(str):
for i in range(6-len(str)):
str = "0"+str;
return str
#獲取數(shù)據(jù)
def getFundData(url):
#打開鏈接
try:
html = urlopen(url)
except HTTPError :
return None;
#獲取目標(biāo)數(shù)據(jù)
try:
bsObj = BeautifulSoup(html, "lxml")
name = bsObj.find("div",{"class":"fundDetail-tit"}).div.get_text()
name = name[:name.index("(")]
value = bsObj.find("",{"id":"gz_gsz"}).get_text()
time = bsObj.find("",{"id":"gz_gztime"}).get_text()
if(time != '--'):
time = "20"+time[time.index("(")+1:time.index(")")]
data = [name,value,time]
except AttributeError:
return None
return data
#如果是新基金,則保存新基金信息
def saveNewFundInfo(code,name):
cur.execute("SELECT * FROM fund_info WHERE code = %s", (code))
if cur.rowcount == 0:
cur.execute("INSERT INTO fund_info (code,name) VALUES (%s, %s)", (code, name))
try:
for num in range(419,1000000):
funCode = getFundNumStr(str(num))
url = "http://fund.eastmoney.com/"+funCode+".html"
data = getFundData(url)
if data != None:
id = str(uuid.uuid1()).replace("-","");
time = datetime.datetime.now().strftime('%Y-%m-%d')
saveNewFundInfo(funCode, data[0])
if(data[1] != '--'):
cur.execute("INSERT INTO fund_day_data (id,code,data,data_time,create_time,update_time) \
VALUES (%s, %s,%s, %s, %s, %s)", (id,funCode, float(data[1]),data[2],time,time))
cur.connection.commit()
print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
finally:
cur.close()
conn.close()
最后編輯于 :
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者