Ajax = 異步JavaScript和XML標(biāo)準(zhǔn)通用標(biāo)記語(yǔ)言
Ajax 是一種用于創(chuàng)建快速動(dòng)態(tài)網(wǎng)頁(yè)的技術(shù)澈侠。
Ajax是一種在無(wú)需重新加載整個(gè)網(wǎng)頁(yè)的情況下眠蚂,能夠更新部分網(wǎng)頁(yè)的技術(shù)。
對(duì)于使用Ajax返回的數(shù)據(jù)我們通常有兩種方式采集數(shù)據(jù)
- 使用自動(dòng)化測(cè)試工具chromedriver進(jìn)行采集
-
通過(guò)抓包找到網(wǎng)頁(yè)發(fā)送Ajax發(fā)送請(qǐng)求并返回的數(shù)據(jù)
目標(biāo)網(wǎng)址:全球視野的中文財(cái)經(jīng)網(wǎng)站fx168
目標(biāo)數(shù)據(jù):采集美元指數(shù)、上證指數(shù)隧哮、深證成指、恒生指數(shù)座舍、現(xiàn)貨黃金沮翔、布蘭特原油、標(biāo)普500曲秉、離岸匯率的每日價(jià)格及漲跌幅
- 使用自動(dòng)化測(cè)試工具來(lái)抓取數(shù)據(jù)信息
import requests
import pymongo
import datetime
from lxml import etree
from selenium import webdriver
from common.pgutils import get_conn
sql_truncate = "truncate TABLE public.news_morning_code"
sql = "insert into public.news_morning_code(TradeName,TradePrice,ChangePre,create_time) values(%s,%s,%s,%s)"
def insert_data(conn, name, present_price, rise_fall):
current_date = datetime.datetime.now()
present_price = float(present_price)
rise_fall = float(rise_fall.split('(')[1][:-2])
with conn.cursor() as cur:
sql_params = [name, present_price, rise_fall, current_date]
cur.execute(sql, sql_params)
def get_usdcny(conn):
"""離岸匯率"""
# client = pymongo.MongoClient('localhost', 27017)
# news = client['news']
# cj_zs = news['cj_zs']
chromedriver = r"/usr/local/share/chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get('http://quote.fx168.com/USDCNY/')
result = driver.page_source
xml = etree.HTML(result)
datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]
name = datas.xpath('./h2/span/text()')[0]
present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 現(xiàn)價(jià)
rise_fall = datas.xpath("./h3/b/text()")[0] # 當(dāng)日漲跌
print(name, present_price, rise_fall)
insert_data(conn, name, present_price, rise_fall)
# data = {
# 'name': name,
# 'present_price': present_price,
# 'rise_fall': rise_fall,
# }
# cj_zs.insert_one(data)
driver.close()
driver.quit()
print('在岸匯率存儲(chǔ)成功')
def get_shcomp(conn):
"""上證綜指"""
chromedriver = r"/usr/local/share/chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get('http://quote.fx168.com/SHCOMP/')
result = driver.page_source
xml = etree.HTML(result)
datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]
name = datas.xpath('./h2/span/text()')[0]
present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 現(xiàn)價(jià)
rise_fall = datas.xpath("./h3/b/text()")[0] # 當(dāng)日漲跌
print(name, present_price, rise_fall)
insert_data(conn, name, present_price, rise_fall)
driver.close()
driver.quit()
print('上證綜指存儲(chǔ)成功')
def get_szcomp(conn):
"""深圳成指"""
chromedriver = r"/usr/local/share/chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get('http://quote.fx168.com/SZCOMP/')
result = driver.page_source
xml = etree.HTML(result)
datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]
name = datas.xpath('./h2/span/text()')[0]
present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 現(xiàn)價(jià)
rise_fall = datas.xpath("./h3/b/text()")[0] # 當(dāng)日漲跌
print(name, present_price, rise_fall)
insert_data(conn, name, present_price, rise_fall)
driver.close()
driver.quit()
print('深圳成指存儲(chǔ)成功')
def get_hsi(conn):
"""恒生指數(shù)"""
chromedriver = r"/usr/local/share/chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get('http://quote.fx168.com/HSI/')
result = driver.page_source
xml = etree.HTML(result)
datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]
name = datas.xpath('./h2/span/text()')[0]
present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 現(xiàn)價(jià)
rise_fall = datas.xpath("./h3/b/text()")[0] # 當(dāng)日漲跌
print(name, present_price, rise_fall)
insert_data(conn, name, present_price, rise_fall)
driver.close()
driver.quit()
print('恒生指數(shù)存儲(chǔ)成功')
def get_dini(conn):
"""美元指數(shù)"""
chromedriver = r"/usr/local/share/chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get('http://quote.fx168.com/DINI/')
result = driver.page_source
xml = etree.HTML(result)
datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]
name = datas.xpath('./h2/span/text()')[0]
present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 現(xiàn)價(jià)
rise_fall = datas.xpath("./h3/b/text()")[0] # 當(dāng)日漲跌
print(name, present_price, rise_fall)
insert_data(conn, name, present_price, rise_fall)
driver.close()
driver.quit()
print('美元指數(shù)存儲(chǔ)成功')
def get_xau(conn):
"""現(xiàn)貨黃金"""
chromedriver = r"/usr/local/share/chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get('http://quote.fx168.com/XAU/')
result = driver.page_source
xml = etree.HTML(result)
datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]
name = datas.xpath('./h2/span/text()')[0]
present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 現(xiàn)價(jià)
rise_fall = datas.xpath("./h3/b/text()")[0] # 當(dāng)日漲跌
print(name, present_price, rise_fall)
insert_data(conn, name, present_price, rise_fall)
driver.close()
driver.quit()
print('現(xiàn)貨黃金存儲(chǔ)成功')
def get_spciw(conn):
"""標(biāo)普500"""
chromedriver = r"/usr/local/share/chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get('http://quote.fx168.com/SPCIW/')
result = driver.page_source
xml = etree.HTML(result)
datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]
present_price = xml.xpath('/html/body/section[1]/div/div/div[2]/div[1]/div/div[1]/h3/span')[0].xpath('string(.)') # 現(xiàn)價(jià)
rise_fall = datas.xpath("./h3/b/text()")[0] # 當(dāng)日漲跌
print('標(biāo)普500', present_price, rise_fall)
name = '標(biāo)普500'
insert_data(conn, name, present_price, rise_fall)
driver.close()
driver.quit()
print('標(biāo)普500存儲(chǔ)成功')
def get_brents(conn):
"""布蘭特油"""
chromedriver = r"/usr/local/share/chromedriver"
driver = webdriver.Chrome(chromedriver)
driver.get('http://quote.fx168.com/BRENTS/')
result = driver.page_source
xml = etree.HTML(result)
datas = xml.xpath(".//div[@class='yjl_fx168_Hangqing_dataDel_zuo']")[0]
name = datas.xpath('./h2/span/text()')[0]
present_price = xml.xpath('//*[@id="hangh3"]/span')[0].xpath('string(.)') # 現(xiàn)價(jià)
rise_fall = datas.xpath("./h3/b/text()")[0] # 當(dāng)日漲跌
print(name, present_price, rise_fall)
insert_data(conn, name, present_price, rise_fall)
driver.close()
driver.quit()
print('布蘭特油存儲(chǔ)成功')
def main():
conn = get_conn()
try:
with conn:
with conn.cursor() as cur:
cur.execute(sql_truncate)
get_usdcny(conn)
get_shcomp(conn)
get_szcomp(conn)
get_hsi(conn)
get_dini(conn)
get_xau(conn)
get_spciw(conn)
get_brents(conn)
finally:
if conn:
conn.close()
if __name__ == '__main__':
main()
- 分析網(wǎng)頁(yè)結(jié)構(gòu)及數(shù)據(jù)返回的方法采蚀,采集數(shù)據(jù)
#!/usr/bin/env python3.4
# encoding: utf-8
"""
Created on 17-12-12
@author: Xu
"""
import json
import requests
import time
import datetime
from common.pgutils import get_conn
def get_json():
url = 'http://fx168api.fx168.com/InterfaceCollect/Default.aspx?Code=fx168&bCode=IQuoteDataALL&succ_callback=show_whdata&_=1512984141823'
my_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
'Host': 'fx168api.fx168.com',
'Referer': 'http://quote.fx168.com/BRENTS/',
}
current_time = int(time.time())
current_date = datetime.datetime.now()
datas = {
'Code': 'fx168',
'bCode':'IQuoteDataALL',
'succ_callback': 'show_whdata',
'_': current_time
}
cont = requests.get(url=url, headers=my_headers, data=datas)
cont.encoding = 'utf-8'
result = cont.text
content = result[12:-1]
info = json.loads(content)['List']
# print(info)
sql_truncate = "truncate TABLE public.news_morning_code"
sql = "insert into public.news_morning_code(TradeName,TradePrice,ChangePre,create_time) values(%s,%s,%s,%s)"
# 0:美元指數(shù) 30:上證指數(shù) 74:深證成指 76:恒生指數(shù) 19:現(xiàn)貨黃金 73:布蘭特原油 29:標(biāo)普500 45:離岸匯率
code_list = [0, 30, 74, 76, 19, 73, 29, 45]
conn = get_conn()
try:
with conn:
with conn.cursor() as cur:
cur.execute(sql_truncate)
for i in code_list:
sql_params = [info[i]['TradeName'], info[i]['TradePrice'], info[i]['ChangePre'], current_date]
# print(sql_params)
cur.execute(sql, sql_params)
finally:
if conn:
conn.close()
if __name__ == '__main__':
get_json()
- 連接postgresql數(shù)據(jù)庫(kù)疲牵,保存數(shù)據(jù)
#!/usr/bin/env python3.4
# encoding: utf-8
"""
Created on 17-12-12
@author: Xu
"""
import psycopg2
def get_conn():
database = '數(shù)據(jù)庫(kù)名'
user = '用戶名'
password = '密碼'
host = 'ip地址'
port = '5432'
return psycopg2.connect(database=database, user=user, password=password, host=host, port=port)
這兩種方式從效率上來(lái)講顯然第二種更快捷,兩者的差別是第一種需要解析html的結(jié)構(gòu)取得數(shù)據(jù)榆鼠,而第二種可以直接對(duì)返回的數(shù)據(jù)進(jìn)行處理進(jìn)而保存我們想要的數(shù)據(jù)瑰步。