由于身邊同事經(jīng)常買雙色球,時(shí)間長了也就慢慢關(guān)注這個(gè),我們中午經(jīng)常也一塊去吃飯甫何,然后去彩票站點(diǎn)。之前是在支付寶上面就可以買遇伞,那會(huì)自己也會(huì)偶爾買10元的辙喂。這片文章主要是爬取了歷史雙色球所有數(shù)據(jù),并進(jìn)行簡單分析鸠珠,純屬業(yè)余愛好巍耗,分析結(jié)果僅供參考。
1.數(shù)據(jù)爬取網(wǎng)頁:歷史雙色球數(shù)據(jù)
#分析網(wǎng)頁后可以得知get歷史所有數(shù)據(jù)的參數(shù)
url='https://datachart.500.com/ssq/history/newinc/history.php?start=03001'
#加載相關(guān)的庫
import requests
import numpy as np
import pandas as pd
#獲取歷史所有雙色球數(shù)據(jù)
response = requests.get(url)
response.encoding = 'utf-8'
re_text = response.text
#網(wǎng)頁數(shù)據(jù)解析
re=re_text.split('<tbody id="tdata">')[1].split('</tbody>')[0]
result=re.split('<tr class="t_tr1">')[1:]
all_numbers=[]
for i in result:
each_numbers=[]
i=i.replace('<!--<td>2</td>-->','')
each=i.split('</td>')[:-1]
for j in each:
each_numbers.append(j.split('>')[1].replace(' ',''))
all_numbers.append(each_numbers)
#定義列名稱
col=['期號(hào)','紅球1','紅球2','紅球3','紅球4','紅球5','紅球6','藍(lán)球','快樂星期天','獎(jiǎng)池獎(jiǎng)金(元)',
'一等獎(jiǎng)注數(shù)','一等獎(jiǎng)獎(jiǎng)金(元)','二等獎(jiǎng)注數(shù)','二等獎(jiǎng)獎(jiǎng)金(元)','總投注額(元)','開獎(jiǎng)日期']
#解析完網(wǎng)頁數(shù)據(jù)渐排,生成雙色球數(shù)據(jù)框
df_all=pd.DataFrame(all_numbers,columns=col)
df_all.head()
2.數(shù)據(jù)轉(zhuǎn)換
#日期轉(zhuǎn)換
df_all['開獎(jiǎng)日期_dt']=pd.to_datetime(df_all['開獎(jiǎng)日期'])
df_all['year']=df_all['開獎(jiǎng)日期_dt'].dt.year
df_all['month']=df_all['開獎(jiǎng)日期_dt'].dt.month
df_all['day']=df_all['開獎(jiǎng)日期_dt'].dt.day
df_all['weekday']=df_all['開獎(jiǎng)日期_dt'].dt.weekday_name
df_all.head()
#one-hot 編碼轉(zhuǎn)換自定義函數(shù)
def lotterydata(df):
modeldata=df.copy()
redball=[]
for i in range(1,34):
redball.append('紅球'+'%02d'%i)
for i in redball:
modeldata[i]=0
blueball=[]
for i in range(1,17):
blueball.append('藍(lán)球'+'%02d'%i)
for i in blueball:
modeldata[i]=0
for row in range(modeldata.shape[0]):
#print(row)
#print(modeldata.iloc[row,:])
for i in redball:
#print(i)
#modeldata[i]=0
if (modeldata.iloc[row,:]['紅球1']==i[-2:] or modeldata.iloc[row,:]['紅球2']==i[-2:]
or modeldata.iloc[row,:]['紅球3']==i[-2:] or modeldata.iloc[row,:]['紅球4']==i[-2:]
or modeldata.iloc[row,:]['紅球5']==i[-2:] or modeldata.iloc[row,:]['紅球6']==i[-2:]):
modeldata.loc[row,i]=1
for j in blueball:
#modeldata[j]=0
if modeldata.iloc[row,:]['藍(lán)球']==j[-2:]:
modeldata.loc[row,j]=1
return modeldata
#生成各顏色球的0-1編碼
modeldata=lotterydata(df_all)
modeldata.head()
3.數(shù)據(jù)分析與展示
allhistorydata=modeldata.iloc[:,-49:].copy()
#歷史所有紅球和藍(lán)球數(shù)據(jù)
allhistorydata_red=allhistorydata.iloc[:,:33]
allhistorydata_blue=allhistorydata.iloc[:,-16:]
#最近20期紅球和最近48期藍(lán)球
#(33*3)/6 每個(gè)紅球有3次出現(xiàn)機(jī)會(huì)炬太,看一共需要多少期,這里取整數(shù)20期
#(16*3)/1 每個(gè)藍(lán)球有3次出現(xiàn)機(jī)會(huì)驯耻,看一共需要多少期
recently20_red=allhistorydata.iloc[:20,:33]
recently48_blue=allhistorydata.iloc[:48,-16:]
#求和
historyred_sum=allhistorydata_red.sum()
historyblue_sum=allhistorydata_blue.sum()
recently20red_sum=recently20_red.sum()
recently48blue_sum=recently48_blue.sum()
#排序
historyred_sum=historyred_sum.sort_values(ascending=True)
historyblue_sum=historyblue_sum.sort_values(ascending=True)
recently20red_sum=recently20red_sum.sort_values(ascending=True)
recently48blue_sum=recently48blue_sum.sort_values(ascending=True)
#數(shù)據(jù)展示
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei'] #顯示中文
plt.figure(figsize=(30,24),facecolor='snow')
#歷史出現(xiàn)次數(shù)最少的10個(gè)紅球
x_red=historyred_sum.index.map(lambda x:x[-2:])[:10]
y_red=historyred_sum.values[:10]
#歷史出現(xiàn)次數(shù)最少的5個(gè)藍(lán)球
x_blue=historyblue_sum.index.map(lambda x:x[-2:])[:5]
y_blue=historyblue_sum.values[:5]
plt.subplot(3,2,1)
plt.bar(x_red,y_red,width=0.4,align='center',color='r')
for a,b in zip(x_red,y_red):
plt.text(a,b,b,ha='center',va='bottom',fontsize=15)
plt.tick_params(axis='x',labelsize=30)
plt.title("歷史出現(xiàn)次數(shù)最少的10個(gè)紅球",fontsize=30)
plt.subplot(3,2,2)
plt.bar(x_blue,y_blue,width=0.2,align='center',color='b')
for a,b in zip(x_blue,y_blue):
plt.text(a,b,b,ha='center',va='bottom',fontsize=15)
plt.tick_params(axis='x',labelsize=30)
plt.title("歷史出現(xiàn)次數(shù)最少的5個(gè)藍(lán)球",fontsize=30)
#最近20期紅球
x20_red=recently20red_sum.index.map(lambda x:x[-2:])
y20_red=recently20red_sum.values
#最近48期藍(lán)球
x48_blue=recently48blue_sum.index.map(lambda x:x[-2:])
y48_blue=recently48blue_sum.values
plt.subplot(3,1,2)
plt.bar(x20_red,y20_red,width=0.5,align='center',color='r')
for a,b in zip(x20_red,y20_red):
plt.text(a,b,b,ha='center',va='bottom',fontsize=15)
plt.tick_params(axis='x',labelsize=25)
plt.title("最近20期紅球情況",fontsize=30)
plt.subplot(3,1,3)
plt.bar(x48_blue,y48_blue,width=0.5,align='center',color='b')
for a,b in zip(x20_blue,y20_blue):
plt.text(a,b,b,ha='center',va='bottom',fontsize=15)
plt.tick_params(axis='x',labelsize=25)
plt.title("最近48期藍(lán)球情況",fontsize=30)
plt.show()
最終的數(shù)據(jù)展示結(jié)果亲族,僅供參考!?筛俊霎迫!