寫在前面:這個程序不是一個人能在短時間內(nèi)完成的,感謝達納伸眶,王哥的支持幫助惊窖。也感謝小平老師,沒有壓迫厘贼,就沒有項目界酒。
簡介:這是一篇很硬核的Blog, 有一定Python基礎(chǔ)的童鞋方能看懂,本程序的主要內(nèi)容是首先通過Python的Selenium爬蟲對12306的圖形驗證碼進行批量爬取嘴秸,然后通過Pillow對圖片的尺寸進行剪裁毁欣。將剪裁好的圖片分為文字和圖形部分并將其打上標簽,分別構(gòu)建卷積神經(jīng)網(wǎng)絡(luò)模型(CNN)對圖片進行學習岳掐,最后通過一系列的整合凭疮,用于12306自動搶票,并將成功信息通過郵件方式發(fā)送給用戶串述。
1执解、搭建12306爬蟲程序
#利用Selenium批量爬取驗證碼
#建立存儲目錄
import time
import os
from lxml import etree
from urllib.request import urlopen
file_dir=r'E:\12306'#設(shè)定存儲目錄
if not os.path.exists(file_dir):
os.mkdir(file_dir)#判斷文件夾是否已經(jīng)存在
#導入selenium庫
import selenium.webdriver as wb
#將谷歌游覽器設(shè)置為自動化處理
br=wb.Chrome()
#進入到12306官網(wǎng)
br.get('https://kyfw.12306.cn/otn/resources/login.html')
#獲取驗證碼按鈕
time.sleep(5)#程序休眠,防止IP被封殺
button=br.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a')
time.sleep(3)
button.click()
#獲取頁面HTML源碼病設(shè)定循環(huán)
i=0
while i<=1000:#設(shè)定爬取1000次
time.sleep(2)
page=br.page_source
#用xpath語言對目標圖片地址進行提取
html=etree.HTML(page)
img=html.xpath('//*[@id="J-loginImg"]')[0]
img_url=img.attrib['src']
#用urllib庫去請求網(wǎng)址得到圖片的二進制數(shù)據(jù)
respond=urlopen(img_url)
print('done')
img_bytes=respond.file.read()
#講讀取的圖片二進制寫入文件并保存
with open(file_dir+'\\name_1_%d.jpg'%i,'wb') as f:#
f.write(img_bytes)
for m in range(20):
print('*',end='')
time.sleep(0.01)
print('完成第%d張圖片下載'%(i+1))
fresh=br.find_element_by_xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div[3]/div/div[3]')
time.sleep(5)#對驗證碼網(wǎng)頁進行刷新纲酗,開啟下一輪爬取
fresh.click()
i+=1
到目前為止衰腌,驗證碼基本獲取完畢,結(jié)果如下耕姊,我們當時為了更好的結(jié)果桶唐,爬取了3萬多張,結(jié)果到后面識別準確率也只能達到90%左右茉兰,說明還是不夠。
2欣簇、圖片剪切规脸,將圖片剪切為指定大小,供于CNN模型學習
#將圖片進行剪切分類
import os
from PIL import Image
import time
char_dir=r'E:\12306_cha'#構(gòu)造文字部分存儲目錄
pic_dir=r'E:\12306_pic'#構(gòu)造圖形部分存儲目錄
ticket_dir=r'E:\12306'#爬蟲圖片被保存的位置
#判斷文件夾是否已經(jīng)存在熊咽,如果沒有莫鸭,新建
if not os.path.exists(char_dir):
os.mkdir(char_dir)
if not os.path.exists(pic_dir):
os.mkdir(pic_dir)
#獲取文件列表
pic_list=os.listdir(ticket_dir)
for i,num in zip(pic_list,range(len(pic_list))):
try:
real=ticket_dir+'\\'+i
image=Image.open(real)
i=i.replace('.jpg','')
cp_ch=image.crop((117,0,230,26))#剪裁文字部分尺寸
cp_ch.save(char_dir+'\\'+'%s.jpg'%i)#保存文字部分
cp_pic_1_1=image.crop((3,39,72,109))#剪裁圖片部分尺寸
cp_pic_1_1.save(pic_dir+'\\'+'%s_1_1.jpg'%i)#保存第一張圖片,下面同理不在贅述
cp_pic_1_2=image.crop((75,39,144,109))
cp_pic_1_2.save(pic_dir+'\\'+'%s_1_2.jpg'%i)
cp_pic_1_3=image.crop((147,39,216,109))
cp_pic_1_3.save(pic_dir+'\\'+'%s_1_3.jpg'%i)
cp_pic_1_4=image.crop((219,39,288,109))
cp_pic_1_4.save(pic_dir+'\\'+'%s_1_4.jpg'%i)
cp_pic_2_1=image.crop((3,110,72,180))
cp_pic_2_1.save(pic_dir+'\\'+'%s_2_1.jpg'%i)
cp_pic_2_2=image.crop((75,110,144,180))
cp_pic_2_2.save(pic_dir+'\\'+'%s_2_2.jpg'%i)
cp_pic_2_3=image.crop((147,110,216,180))
cp_pic_2_3.save(pic_dir+'\\'+'%s_2_3.jpg'%i)
cp_pic_2_4=image.crop((219,110,288,180))
cp_pic_2_4.save(pic_dir+'\\'+'%s_2_4.jpg'%i)
for n in range(30):
print('*',end='')
time.sleep(0.02)
except:
print('image error')
continue
print('第%d張圖片已經(jīng)處理横殴,還剩%d張'%(num+1,len(pic_list)-num-1))
print('Having done all the pictures')
到這一步被因,一張驗證碼圖片就被分為1張文字圖片和8張圖形圖片分別保存在各自的文件夾中卿拴,效果如下:
3、接下來一步梨与,就是最讓人自閉的打標簽堕花,將類別一樣的整理到一個文件夾中,我人已經(jīng)分傻了粥鞋,能體會從3萬張圖片中找圖片的痛苦嗎缘挽?不過還是感謝達納同學分好的小部分數(shù)據(jù)集,正是這部分數(shù)據(jù)集用于機器學習呻粹,才使得后面3萬張被成功分類壕曼。效果圖如下:
4、將圖片做合適的處理等浊,喂給CNN模型進行學習,模型搭建如下:
首先訓練文字部分
#文字部分CNN網(wǎng)絡(luò)
#原模型修改
# 導入所需模塊
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.initializers import TruncatedNormal
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K
class SimpleVGGNet:
@staticmethod
def build(width, height, depth, classes):
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# CONV => RELU => POOL
model.add(Conv2D(32, (3, 3), padding="same",
input_shape=inputShape,kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))#yk注:3*3和2*2(原文)的區(qū)別腮郊,池化層的尺寸會有影響嗎?
#model.add(Dropout(0.25))#原文是備注掉的筹燕,增加的目的是為了增加模型的泛化能力
# (CONV => RELU) * 2 => POOL
model.add(Conv2D(64, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))#yk將2*2修改為3*3
#model.add(Dropout(0.25))#yk增加了泛化能力
# (CONV => RELU) * 3 => POOL
model.add(Conv2D(128, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))
# FC層
model.add(Flatten())
model.add(Dense(1024,kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))#yk將原文的512改為1024
model.add(Activation("relu"))
model.add(BatchNormalization())
#model.add(Dropout(0.25))#yk將0.6改為了0.25
# softmax 分類
model.add(Dense(classes,kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("softmax"))
return model
CNN網(wǎng)絡(luò)搭建完畢
# 導入所需工具包
from CNN_net import SimpleVGGNet
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
import utils_paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os
import PIL.Image as Image
# 讀取數(shù)據(jù)和標簽
print("------開始讀取數(shù)據(jù)------")
data = []
labels = []
# 拿到圖像數(shù)據(jù)路徑轧飞,方便后續(xù)讀取
imagePaths = sorted(list(utils_paths.list_images(path_1)))#path_1時文字部分的文件路徑
random.seed(42)
random.shuffle(imagePaths)
# 遍歷讀取數(shù)據(jù)
for imagePath in imagePaths:
# 讀取圖像數(shù)據(jù)
image = Image.open(imagePath)
image =np.array(image)
image = cv2.resize(image, (96, 96))#修改
data.append(image)
# 讀取標簽
label = imagePath.split(os.path.sep)[-2]
labels.append(label)
# 對圖像數(shù)據(jù)做scale操作
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
# 數(shù)據(jù)集切分
(trainX, testX, trainY, testY) = train_test_split(data,labels, test_size=0.25, random_state=42)
# 轉(zhuǎn)換標簽為one-hot encoding格式
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)
# 數(shù)據(jù)增強處理
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode="nearest")
# 建立卷積神經(jīng)網(wǎng)絡(luò)
model = SimpleVGGNet.build(width=96, height=96, depth=3,classes=len(lb.classes_))
# 設(shè)置初始化超參數(shù)
INIT_LR = 0.001
EPOCHS = 100#
BS = 32
# 損失函數(shù),編譯模型
print("------準備訓練網(wǎng)絡(luò)------")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,metrics=["accuracy"])
# 訓練網(wǎng)絡(luò)模型
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS)
"""
H = model.fit(trainX, trainY, validation_data=(testX, testY),
epochs=EPOCHS, batch_size=32)
"""
# 測試
print("------測試網(wǎng)絡(luò)------")
predictions = model.predict(testX, batch_size=32)
print(classification_report(testY.argmax(axis=1),
predictions.argmax(axis=1), target_names=lb.classes_))
# 繪制結(jié)果曲線
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.plot(N, H.history["accuracy"], label="train_acc")
plt.plot(N, H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.savefig(r'path_2\cnn_plot.png')#path_2時學習曲線的保存路徑
# 保存模型
print("------正在保存模型------")
model.save(r'path_3\cnn.model')#path_3是模型的保存路徑
f = open(r'path_4\cnn_lb.pickle', "wb")#path_4是標簽集的保存路徑
f.write(pickle.dumps(lb))
f.close()
模型開始進行訓練
可以看到最后的準確率很低庄萎,But由于我在其中將Dropout層去除踪少,模型過擬合,導致我得到的結(jié)果雖然準確率低糠涛,但是識別效果很好(比我的圖片識別效果還好)
圖形部分CNN模型
#原模型修改
# 導入所需模塊
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.initializers import TruncatedNormal
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K
class SimpleVGGNet:
@staticmethod
def build(width, height, depth, classes):
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# CONV => RELU => POOL
model.add(Conv2D(32, (3, 3), padding="same",
input_shape=inputShape,kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))#余坤注:3*3和2*2(原文)的區(qū)別援奢,池化層的尺寸會有影響嗎?
model.add(Dropout(0.25))#原文是備注掉的忍捡,增加的目的是為了增加模型的泛化能力
# (CONV => RELU) * 2 => POOL
model.add(Conv2D(64, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))#yk將2*2修改為3*3
model.add(Dropout(0.25))#yk增加了泛化能力
# (CONV => RELU) * 3 => POOL
model.add(Conv2D(128, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same",kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# FC層
model.add(Flatten())
model.add(Dense(1024,kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))#yk將原文的512改為1024
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))#yk將0.6改為了0.5
# softmax 分類
model.add(Dense(classes,kernel_initializer=TruncatedNormal(mean=0.0, stddev=0.01)))
model.add(Activation("softmax"))
return model
CNN模型搭建完畢
開始對圖形部分進行CNN網(wǎng)絡(luò)訓練
# 導入所需工具包
from CNN_net import SimpleVGGNet
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
import utils_paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os
import PIL.Image as Image
# 讀取數(shù)據(jù)和標簽
print("------開始讀取數(shù)據(jù)------")
data = []
labels = []
# 拿到圖像數(shù)據(jù)路徑集漾,方便后續(xù)讀取
imagePaths =
sorted(list(utils_paths.list_images(path_1)))#path_1是圖形的路徑
random.seed(42)
random.shuffle(imagePaths)
# 遍歷讀取數(shù)據(jù)
for imagePath in imagePaths:
# 讀取圖像數(shù)據(jù)
image = Image.open(imagePath)
image =np.array(image)
image = cv2.resize(image, (80, 80))#修改
data.append(image)
# 讀取標簽
label = imagePath.split(os.path.sep)[-2]
labels.append(label)
# 對圖像數(shù)據(jù)做scale操作
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
# 數(shù)據(jù)集切分
(trainX, testX, trainY, testY) = train_test_split(data,labels, test_size=0.25, random_state=42)
# 轉(zhuǎn)換標簽為one-hot encoding格式
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)
# 數(shù)據(jù)增強處理
aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1,
height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode="nearest")
# 建立卷積神經(jīng)網(wǎng)絡(luò)
model = SimpleVGGNet.build(width=80, height=80, depth=3,classes=len(lb.classes_))
# 設(shè)置初始化超參數(shù)
INIT_LR = 0.001
EPOCHS = 100#
BS = 32
# 損失函數(shù),編譯模型
print("------準備訓練網(wǎng)絡(luò)------")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,metrics=["accuracy"])
# 訓練網(wǎng)絡(luò)模型
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS)
"""
H = model.fit(trainX, trainY, validation_data=(testX, testY),
epochs=EPOCHS, batch_size=32)
"""
# 測試
print("------測試網(wǎng)絡(luò)------")
predictions = model.predict(testX, batch_size=32)
print(classification_report(testY.argmax(axis=1),
predictions.argmax(axis=1), target_names=lb.classes_))
# 繪制結(jié)果曲線
N = np.arange(0, EPOCHS)
plt.style.use("ggplot")
plt.figure()
plt.plot(N, H.history["loss"], label="train_loss")
plt.plot(N, H.history["val_loss"], label="val_loss")
plt.plot(N, H.history["accuracy"], label="train_acc")
plt.plot(N, H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend()
plt.savefig(r'path_2\cnn_plot.png')#path_2是學習曲線的存儲路徑
# 保存模型
print("------正在保存模型------")
model.save(r'path_3\cnn.model')#path_3是你自己的模型存儲路徑
f = open(r'path_4\cnn_lb.pickle', "wb")#path_4時你自己的標簽集存儲路徑
f.write(pickle.dumps(lb))
f.close()
可以看到最后的準確率很接近1砸脊,達到了92%具篇,但我覺得效果還只是一般,還是會有點智障凌埂,比如把菠蘿識別成啤酒驱显,紅豆識別成紅棗,沒辦法瞳抓,要想準確率更高埃疫,必須要有更大的數(shù)據(jù)集,其次對圖片進行合適的處理(我還不是太會)
4.5孩哑、這里要提一下啊栓霜,我們的做法稍微巧了一點,我們先讓機器對1500張左右的圖片進行學習横蜒,準確率可以達到80%左右胳蛮。然后我們讓這種程度的機器對圖片進行分類销凑,最終分完了30000張圖片(當然我們最后人工進行挑錯了),如下是分類的代碼:
# 導入所需工具包
from keras.models import load_model
import argparse
import pickle
import cv2
import PIL.Image as Image
import os
import shutil
import numpy as np
print("------讀取模型和標簽------")
model = load_model(path)#path是保存模型的路徑
lb = pickle.loads(open(path_1, "rb").read())#path_1是保存標簽集的路徑
def get_piclist(path):
return os.listdir(path)
path=r'E:\12306_pic'
pic_list=get_piclist(path)#get picture list
for i in pic_list:
img_path=path+'\\'+i
image=np.array(Image.open(img_path))
image = cv2.resize(image, (80,80))
image = image.astype("float") / 255.0
image = image.reshape((1, image.shape[0], image.shape[1],image.shape[2]))
preds = model.predict(image)
j = preds.argmax(axis=1)[0]
label = lb.classes_[j]
accuracy=int(preds[0][j] * 100)
print(label+'===>'+str(accuracy))
if accuracy>10:
shutil.move(img_path,r'Profile'+'\\'+'%s'%label)#這一步是把圖片轉(zhuǎn)移到預(yù)測的標簽文件中
else:
continue
5仅炊、進行到這里斗幼,基本上絕大部分工作已經(jīng)做完了,后面的就是把前面的結(jié)果和Selenium結(jié)合實現(xiàn)自動化搶票,代碼如下:
#獲取時鐘的函數(shù)
import datetime
def get_time():
contem=datetime.datetime.now()
return contem
#發(fā)送郵件的函數(shù)茂洒,以163郵箱為端口
#獲取你的郵箱
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
my_sender='user' # 發(fā)件人郵箱賬號
my_pass = 'password' # 發(fā)件人郵箱密碼
def mail(address):
my_user=address
ret=True
try:
msg=MIMEText('小主鸡岗,快來12306官網(wǎng)支付您的車票嘍3岛āD写荨扳抽!','plain','utf-8')
msg['From']=formataddr(["搶票小助手",my_sender]) # 括號里的對應(yīng)發(fā)件人郵箱昵稱、發(fā)件人郵箱賬號
msg['To']=formataddr(["FK",my_user]) # 括號里的對應(yīng)收件人郵箱昵稱智哀、收件人郵箱賬號
msg['Subject']="12306提醒" # 郵件的主題次询,也可以說是標題
server=smtplib.SMTP_SSL("smtp.163.com", 465) # 發(fā)件人郵箱中的SMTP服務(wù)器,端口是25,這里是163郵箱
server.login(my_sender, my_pass) # 括號中對應(yīng)的是發(fā)件人郵箱賬號瓷叫、郵箱密碼
server.sendmail(my_sender,[my_user,],msg.as_string()) # 括號中對應(yīng)的是發(fā)件人郵箱賬號屯吊、收件人郵箱賬號、發(fā)送郵件
server.quit() # 關(guān)閉連接
except Exception: # 如果 try 中的語句沒有執(zhí)行摹菠,則會執(zhí)行下面的 ret=False
ret=False
return
#獲取用戶的輸入12306賬號和密碼盒卸,并存儲為CSV文件以及自己的郵箱
import csv
import pandas as pd
import os
#設(shè)置讀取或者寫入CSV
def get_user():
path=r'..\User_information\user_information.csv'
if os.path.exists(path):
infm=pd.read_csv(path)
user_name=int(infm.columns[0])
user_secret=(infm.columns[1])
else:
user_name=int(input("Please input your User Name: "))
user_secret=input("please input your User Secret: ")
with open(path,'w',newline=None) as f:
cwriter=csv.writer(f)
cwriter.writerow([user_name,user_secret])
return (user_name,user_secret)
#圖片識別部分函數(shù)
from keras.models import load_model
import argparse
import pickle
import cv2
import PIL.Image as Image
import os
import shutil
import numpy as np
print("------讀取模型和標簽------")
model_p = load_model(r'..\pic_train\cnn.model')
lb_p = pickle.loads(open(r'..\pic_train\cnn_lb.pickle', "rb").read())
model_c = load_model(r'..\char_train\cnn.model')
lb_c = pickle.loads(open(r'..\char_train\cnn_lb.pickle', "rb").read())
def pic_identify(image):
image = cv2.resize(image, (80,80))
image = image.astype("float") / 255.0
image = image.reshape((1, image.shape[0], image.shape[1],image.shape[2]))
preds = model_p.predict(image)
j = preds.argmax(axis=1)[0]
label = lb_p.classes_[j]
return label
def char_identify(charc):
image = cv2.resize(charc, (96, 96))
image = image.astype("float") / 255.0
image = image.reshape((1, image.shape[0], image.shape[1],image.shape[2]))
preds = model_c.predict(image)
i = preds.argmax(axis=1)[0]
label = lb_c.classes_[i]
return label
#登錄12306官網(wǎng)函數(shù)
import selenium.webdriver as wb
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait as WDW
from selenium.webdriver.common.by import By
import time
from lxml import etree
from urllib.request import urlopen
import PIL.Image as Image
import numpy as np
from selenium.webdriver.common.action_chains import ActionChains
def login(user_name,user_secret,br):
url='https://kyfw.12306.cn/otn/resources/login.html'
wait=WDW(br,10)
br.get(url)
login_surface=wait.until(EC.presence_of_element_located((By.XPATH,'/html/body/div[2]/div[2]/ul/li[2]/a')))
time.sleep(5)
login_surface.click()
time.sleep(5)
user_name_button=br.find_element_by_id('J-userName').send_keys(user_name)
user_secret_button=br.find_element_by_id('J-password').send_keys(user_secret)
br.maximize_window()
def pic_get(br):
file_dir=r'..\download_pic'
page=br.page_source
html=etree.HTML(page)
img=html.xpath('//*[@id="J-loginImg"]')[0]
img_url=img.attrib['src']
respond=urlopen(img_url)
img_bytes=respond.file.read()
with open(file_dir+'\\12306.jpg','wb') as f:
f.write(img_bytes)
def pic_cut():
pic_path=r'..\download_pic\12306.jpg'
image=Image.open(pic_path)
charc=np.array(image.crop((117,0,230,26)))
pic_1=np.array(image.crop((3,39,72,109)))
l_1=(1050,350)
pic_2=np.array(image.crop((75,39,144,109)))
l_2=(1130,350)
pic_3=np.array(image.crop((147,39,216,109)))
l_3=(1200,350)
pic_4=np.array(image.crop((219,39,288,109)))
l_4=(1270,350)
pic_5=np.array(image.crop((3,110,72,180)))
l_5=(1050,420)
pic_6=np.array(image.crop((75,110,144,180)))
l_6=(1130,420)
pic_7=np.array(image.crop((147,110,216,180)))
l_7=(1200,420)
pic_8=np.array(image.crop((219,110,288,180)))
l_8=(1270,420)
return ([pic_1,pic_2,pic_3,pic_4,pic_5,pic_6,pic_7,pic_8],[l_1,l_2,l_3,l_4,l_5,l_6,l_7,l_8],charc)
def mouse_click(x,y,br):
ActionChains(br).move_by_offset(x,y).click().perform()
ActionChains(br).move_by_offset(-x,-y).perform()
#開始搶票,并將上面所有提到的函數(shù)集中到此模塊中
from get_information import get_user as gu
from login_12306 import *
from identify import *
import time
import datetime
from clock import *
from tqdm import tqdm
from selenium.webdriver.common.keys import Keys
from email_notion import mail
def main():
#開始記錄登錄信息
username,password=gu()
address=input('請輸入您的電子郵箱: ')
#啟動游覽器腳本
br=wb.Chrome()
#登錄12306
login(username,password,br)
#等待5秒
time.sleep(5)
#下載圖片
pic_get(br)
#圖片剪切
piclist,location,chara=pic_cut()
#獲取char文字內(nèi)容
chara_content=char_identify(chara)
#識別文字,把機器不易識別的剔除
while chara_content=='卷尺' or chara_content=='鍋鏟' or chara_content=='海報' or chara_content=='珊瑚'or chara_content=='棉棒':
fresh=br.find_element_by_xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div[3]/div/div[3]')
fresh.click()
pic_get(br)
time.sleep(10)
piclist,location,chara=pic_cut()
time.sleep(5)
chara_content=char_identify(chara)
#選取相對應(yīng)的圖片區(qū)
for i,j in zip(piclist,location):
if pic_identify(i)==chara_content:
print(pic_identify(i))
mouse_click(j[0],j[1],br)
login_button=br.find_element_by_id('J-login')
verify=input('How about the choice:')
if verify=='y':
login_button.click()
#選取買票次氨,僅支持單程
time.sleep(10)
dan=br.find_element_by_xpath('//*[@id="J-chepiao"]/a')
dan_v=br.find_element_by_xpath('//*[@id="J-chepiao"]/div/div[1]/ul/li[1]/a')
dan.click()
dan_v.click()
#輸入出發(fā)地蔽介,目的地和出發(fā)時間
fromstButton=br.find_element_by_id('fromStationText')
tostButton=br.find_element_by_id('toStationText')
departure=input("請輸入出發(fā)地:")
destiney=input("請輸入目的地:")
gotime=input('請輸入出發(fā)時間(eg.2019-12-3):')
today=datetime.datetime.now()
expect=datetime.datetime.strptime(gotime,'%Y-%m-%d')
year=int(gotime.split('-')[0])
month=int(gotime.split('-')[1])
day=int(gotime.split('-')[2])
fromstButton.click()
fromstButton.send_keys(departure)
fromstButton.send_keys(Keys.ENTER)
time.sleep(1)
tostButton.click()
tostButton.send_keys(destiney)
tostButton.send_keys(Keys.ENTER)
date=br.find_element_by_id('date_icon_1').click()
tomonth=br.find_elements_by_xpath('/html/body/div[34]/div[1]/div[2]/div')
nextmonth=br.find_elements_by_xpath('/html/body/div[34]/div[2]/div[2]/div')
#date=br.find_element_by_id('date_icon_1')
#設(shè)定循環(huán),如果不是提前一個月的節(jié)點煮寡,程序會進行休眠
while expect>today+datetime.timedelta(days=30):
for i in tqdm(range(3600)):
time.sleep(1)
today=get_time()
if time.localtime().tm_mon==month:
choice=tomonth[day-1].click()
else:
choice=nextmonth[day-1].click()
#開始查詢
all_button=br.find_element_by_id('train_type_btn_all').click()
search_button=br.find_element_by_id('query_ticket').click()
#僅以一個例子為例虹蓄,太多了設(shè)置比較麻煩
time.sleep(5)
train_info=etree.HTML(br.page_source)
#獲取列車的始終地
place=train_info.xpath('//*[@id="train_num_0"]/div[2]/strong')
#獲取列車的初末時間
getime=train_info.xpath('//*[@id="train_num_0"]/div[3]/strong')
trainId=train_info.xpath('//*[@id="queryLeftTable"]/tr[1]')[0].attrib['id']
print("已為您查詢到可依靠的列車")
print('%s ===>> %s %s ===>> %s'%(place[0].text,place[1].text,getime[0].text,getime[1].text))
#預(yù)定開始
preorder=br.find_element_by_xpath('//*[@id="%s"]/td[13]/a'%trainId)
preorder.click()
time.sleep(2)
#選擇乘客
passenger=br.find_element_by_id('normalPassenger_0')
passenger.click()
#提交訂單
submit=br.find_element_by_id('submitOrder_id').click()
time.sleep(2)
#確認
sure=br.find_element_by_id('qr_submit_id').click()
mail(address)
return br
main()
哎,由于我作業(yè)纏身幸撕,有一部分代碼的注釋還沒有寫完薇组,等到閑下來會一一注釋清楚,效果圖額(主要是我忘了存坐儿,再加上調(diào)試代碼時一天購票三次律胀,已經(jīng)沒機會買票了,就看最終結(jié)果吧
這就是本篇Blog的主要內(nèi)容貌矿,沒辦法時間太緊累铅,作業(yè)太多,所以有不當之處還請指正站叼,歡迎大家交流學習啊。所有文件程序都已保存到百度網(wǎng)盤 lsbk