前段時間為了ICP證書披坏,寫了一個以node.js為后臺阳懂,vue為前臺頁面的小項目。眾所周知在小的項目它也要數(shù)據(jù)來支撐替劈,這不是被逼著搗鼓如何快速的導(dǎo)入數(shù)據(jù)到數(shù)據(jù)庫,我這種方法不一定是最好的得滤,但是在現(xiàn)階段我覺的是最有意思的陨献。
項目比較簡單,大佬勿噴懂更,第一次用python寫東西多多包涵眨业!有錯誤的地方希望大神指點急膀。
簡單的介紹一下使用到庫
- pymysql:用來鏈接操作數(shù)據(jù)庫
- xlwt :導(dǎo)出xls文件
- pandas:數(shù)據(jù)清理,及插入(非常強大的一個庫)
話不多說直接上代碼
#!/usr/bin/python3
import pymysql
import requests
import json
import xlwt
import os
import shutil
import pandas as pd
import qtui.dao.path as path
from sqlalchemy import create_engine
class DatabaseAccess():
def __init__(self) -> None:
print("初始成功龄捡,數(shù)據(jù)庫連接信息")
# self.isConnectionOpen()
self.__db_host = "localhost"
self.__db_port = 3306
self.__db_user = "root"
self.__db_password = "123456"
self.__db_database = "storedb"
# 爬取數(shù)據(jù)
def getdataforurl(self):
print("爬取數(shù)據(jù)卓嫂,自己寫吧,我扒的就不展示了")
# 鏈接數(shù)據(jù)庫
def isConnectionOpen(self):
self.__db = pymysql.connect(
host=self.__db_host,
port=self.__db_port,
user=self.__db_user,
password=self.__db_password,
database=self.__db_database,
charset='utf8'
)
# 插入數(shù)據(jù)
def linesinsert(self, data):
try:
# 連接數(shù)據(jù)庫
self.isConnectionOpen()
# 使用 cursor() 方法創(chuàng)建一個游標對象 cursor
cursor = self.__db.cursor()
# 插入數(shù)據(jù)語句 簡單的sql語句 插入數(shù)據(jù)庫(后面用pandas好像更方便在handle_data方法中)
# 軟件供應(yīng)
# query = """insert into software (id, software_introduction, applicable_industries,category,contacts,display_title,mail_box,qq,software_price,team_city,team_joining_time,team_logo,team_name,telephone,vps_table_name) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
# values = (data["id"], data["software_introduction"], data["applicable_industries"], data["category"], data["contacts"], data["display_title"], data["mail_box"],
# data["qq"], data["software_price"], data["team_city"], data["team_joining_time"], data["team_logo"], data["team_name"], data["telephone"], data["vps_table_name"])
# 找團隊
# query = """insert into team (id, case_work, city,contacts,display_title,image,joining_time,mail_box,qq,specialty,team_introduction,telephone,vps_table_name,name_of_employer) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
# values = (data["id"], data["case_work"], data["city"], data["contacts"], data["display_title"], data["image"], data["joining_time"], data["mail_box"],
# data["qq"], data["specialty"], data["team_introduction"], data["telephone"], data["vps_table_name"], "Sky666")
# 找項目
query = """insert into project (id, city,display_title,deadline,development_cycle,project_budget,employer_joining_time,end_of_bidding,item_no,project_classification,project_introduction,project_status,release_time,vps_table_name,image_header,name_of_employer) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
values = (data["id"], data["city"], data["display_title"], data["deadline"], data["development_cycle"], data["project_budget"],
data["employer_joining_time"], data["end_of_bidding"], data["item_no"], data["project_classification"], data["project_introduction"], data["project_status"], data["release_time"], data["vps_table_name"], "http://www.hfwish.com/theme/wrf_wbfb_v2/images/1-191031162301Y9.jpg", "Sky666")
cursor.execute(query, values)
except Exception as e:
print(e)
finally:
# 關(guān)閉數(shù)據(jù)庫連接
cursor.close()
self.__db.commit()
self.__db.close()
# 查詢所有表的名稱
def check_table_name(self):
print("查詢表名中....")
results = []
try:
self.isConnectionOpen()
with self.__db.cursor() as cursor:
sql = '''SHOW TABLES'''
cursor.execute(sql)
result = cursor.fetchall()
for i in range(len(result)):
results.append(result[i][0])
except Exception as e:
print(e)
finally:
# 關(guān)閉數(shù)據(jù)庫連接
cursor.close()
self.__db.commit()
self.__db.close()
print("查詢成功==========================")
return results
# 根據(jù)表名導(dǎo)出示例模板
def export(self, table_name):
print("導(dǎo)出模板")
try:
# 連接數(shù)據(jù)庫
self.isConnectionOpen()
# 使用 cursor() 方法創(chuàng)建一個游標對象 cursor
cursor = self.__db.cursor()
sql = 'select * from %s;' % table_name
cursor.execute(sql) # 執(zhí)行sql
fileds = [filed[0] for filed in cursor.description] # 所有的字段
all_data = cursor.fetchall()
book = xlwt.Workbook(encoding='utf-8')
# 背景色--淡綠色
patternLightGreen = xlwt.Pattern()
patternLightGreen.pattern = xlwt.Pattern.SOLID_PATTERN
patternLightGreen.pattern_fore_colour = 42
styleLightGreen = xlwt.XFStyle()
styleLightGreen.pattern = patternLightGreen
sheet = book.add_sheet(table_name)
for col, filed in enumerate(fileds): # 寫表頭的聘殖,
sheet.write(0, col, filed, styleLightGreen) # xls表頭
# print(all_data)
if len(all_data) > 0:
row = 1 # 第一行
for col, filed in enumerate(all_data[len(all_data)-1]): # 控制列
sheet.write(row, col, filed)
book.save('%s模板.xls' % table_name)
bPath = os.getcwd()
# 獲取當前文件路徑
file_path = os.path.join(bPath, '%s模板.xls' %
table_name)
# 移動文件到E盤地方
target_path = os.path.join(path.GetDir.get_BASE_DIR3("dataHook"))
# 使用shutil包的move方法移動文件
shutil.move(file_path, target_path)
print("導(dǎo)出%s表模板成功晨雳!" % table_name,
"------------------------------------------文件路勁:E:\ICP\ICP-Date\dataHook")
finally:
# 關(guān)閉數(shù)據(jù)庫連接
cursor.close()
self.__db.commit()
self.__db.close()
# 根據(jù)表名導(dǎo)出所有數(shù)據(jù)
def exportAll(self, table_name):
print("導(dǎo)出表結(jié)構(gòu)")
try:
# 連接數(shù)據(jù)庫
self.isConnectionOpen()
# 使用 cursor() 方法創(chuàng)建一個游標對象 cursor
cursor = self.__db.cursor()
sql = 'select * from %s;' % table_name
cursor.execute(sql) # 執(zhí)行sql
fileds = [filed[0] for filed in cursor.description] # 所有的字段
all_data = cursor.fetchall()
book = xlwt.Workbook(encoding='utf-8')
# 背景色--淡綠色
patternLightGreen = xlwt.Pattern()
patternLightGreen.pattern = xlwt.Pattern.SOLID_PATTERN
patternLightGreen.pattern_fore_colour = 42
styleLightGreen = xlwt.XFStyle()
styleLightGreen.pattern = patternLightGreen
sheet = book.add_sheet('sheet1')
for col, filed in enumerate(fileds): # 寫表頭的,
sheet.write(0, col, filed, styleLightGreen) # xls表頭
# print(all_data)
row = 1 # 第一行
for data in all_data: # 行
for col, filed in enumerate(data): # 控制列
sheet.write(row, col, filed)
row += 1 # 每次寫完一行就斤,行數(shù)加一
# print("導(dǎo)出%s表成功悍募!" % table_name,
# "------------------------------------------")
book.save('%s.xls' % table_name)
bPath = os.getcwd()
# 獲取當前文件路徑
file_path = os.path.join(bPath, '%s.xls' % table_name)
# 移動文件到E盤地方
target_path = os.path.join(path.GetDir.get_BASE_DIR3("dataHook"))
# 使用shutil包的move方法移動文件
shutil.move(file_path, target_path)
print("導(dǎo)出%s表成功!" % table_name,
"------------------------------------------文件路勁:E:\ICP\ICP-Date\dataHook")
finally:
# 關(guān)閉數(shù)據(jù)庫連接
cursor.close()
self.__db.commit()
self.__db.close()
# 查詢數(shù)據(jù)
def check_date(self):
# 重新建立數(shù)據(jù)庫連接
self.isConnectionOpen()
cursor = self.__db.cursor()
# 查詢數(shù)據(jù)庫并打印內(nèi)容
cursor.execute('''select * from catering_sale''')
results = cursor.fetchall()
for row in results:
print(row)
# 關(guān)閉
cursor.close()
self.__db.commit()
self.__db.close()
# 讀取xls
def read_excl(self, table_name):
print("開始讀取文件")
allFile = []
try:
allFile = path.GetDir.file_name(
path.GetDir.get_BASE_DIR3("inserData"), table_name+"模板")
except Exception as e:
print("讀取文件報錯==========================")
print(e)
finally:
if len(allFile) == 0:
print("未查詢到%s模板.xls文件" % table_name, "請檢查inserData文件夾")
return False
else:
print("所有%s.xls文件路徑" % table_name, allFile)
# select = input("查詢到多個文件洋机,請選擇第幾個? ")
return allFile[0]
# 讀取根據(jù)文件路徑處理數(shù)據(jù)
def handle_data(self, paths, name):
# 開始清理數(shù)據(jù)==============================
print("開始清理數(shù)據(jù)==============================path:", paths)
rPath = os.path.join(paths)
print("rPath", rPath)
df = pd.read_excel(rPath) # 使用pd讀取數(shù)據(jù)
# 清洗數(shù)據(jù): 沒有列頭, 缺失值洋魂,空行绷旗,重復(fù)數(shù)據(jù),非ASCII 字符(沒弄)副砍,
df.dropna(axis=0, how='any', inplace=True) # 刪除有空行
df.drop_duplicates("id", "first", inplace=True) # 刪除Id重復(fù)數(shù)據(jù)0
print(df.to_string())
print("清理數(shù)據(jù)完成===================================最終數(shù)據(jù)")
print("開始插入數(shù)據(jù)庫=================================START")
try:
self.isConnectionOpen()
db_info = {"host": self.__db_host,
"port": self.__db_port,
"user": self.__db_user,
"password": self.__db_password,
"database": self.__db_database,
"charset": 'utf8'}
engine = create_engine(
'mysql+pymysql://%(user)s:%(password)s@%(host)s:%(port)d/%(database)s?charset=utf8' % db_info, encoding='utf-8')
df.to_sql(name, con=engine,
if_exists='append', index=False)
target_path = os.path.join(path.GetDir.get_BASE_DIR3("finalData"))
target_path = target_path + "%s.csv" % name
df.to_csv(target_path, encoding='utf-8', index=False) # 寫成csv文件保存
except Exception as e:
print(e, "異常數(shù)據(jù)")
finally:
# 關(guān)閉數(shù)據(jù)庫連接
self.__db.commit()
self.__db.close()
print("插入數(shù)據(jù)結(jié)束")
if __name__ == "__main__":
# 創(chuàng)建實例化對象
db = DatabaseAccess()
# db.linesinsert()
# db.check_date()
# db.getdataforurl()
one = input("是否有匹配的模板數(shù)據(jù)表請輸入是/否:")
if one == '是':
table_name = db.check_table_name()
print("當前表:", table_name)
name = input("輸入你要導(dǎo)入的表名稱:")
paths = db.read_excl(name)
if paths == False:
print("-------------需要把對應(yīng)表的數(shù)據(jù).xls格式模板放入inserData文件夾下------------------------")
else:
db.handle_data(paths, name)
else:
table_name = db.check_table_name()
print("當前表:", table_name)
name = input("輸入你要導(dǎo)出模板的表名稱:")
db.export(name)
# table_name = db.check_table_name()
# print("當前表:", table_name)
# name = input("選擇你要導(dǎo)出的表名稱:")
# 根據(jù)表名導(dǎo)出示例模板
# db.export(name)
# 根據(jù)表名導(dǎo)出說有數(shù)據(jù)
# db.exportAll("team")
# 根據(jù)表名插入數(shù)據(jù)
# paths = db.read_excl("team")
# db.handle_data(paths, "team")
# f = open(r"e:/ICP/ICP-Date/dataHook/team.xls", 'rb')
# its_code = f.read()
# print(chardet.detect(its_code))
哭~~~ 后來產(chǎn)品說這個他沒環(huán)境怎么跑衔肢。沒辦法基于這個幾個簡單的功能,做了一版帶GUI的程序豁翎。
晚點把鏈接放出來角骤。。心剥。邦尊。。