# coding: utf-8
from pandas import read_excel
import pandas as pd
import os
import time
#df=read_excel('E:/R/Data/儲(chǔ)值卡金額異動(dòng)統(tǒng)計(jì)/11/2021-03.xlsx')
#df1.head()
time_start=time.time()
path="D:/test/test數(shù)據(jù)/aa/"
#path="E:/R/Data/儲(chǔ)值卡金額異動(dòng)統(tǒng)計(jì)/11/"
excels = [read_excel(path+fname) for fname in os.listdir(path) if 'xls' in fname]
df = pd.concat(excels)
df.to_excel('D:/test/test數(shù)據(jù)/儲(chǔ)值卡金額異動(dòng)匯總.xlsx', index=False)
#path="D:/test/test數(shù)據(jù)/"
#excels = [read_excel(fname) for fname in os.listdir(path) if 'xls' in fname]
#df = pd.concat(excels)
#df.to_excel('D:/test/test數(shù)據(jù)/匯總.xlsx', index=False)
print("------完成------")
time_end=time.time()
print('time cost',time_end-time_start,'s')
df.head()
2.合并路徑下所有excel表
#!/usr/bin/python
# coding: utf-8
import pandas as pd
import os
import time
time_start=time.time()
#文件路徑
file_dir="D:/test/test數(shù)據(jù)/aa/"
#合并新表格名稱(chēng)
new_filename=file_dir+'\\new_file.xlsx'
#返回文件路徑下所有表格的列表
file_list=os.listdir(file_dir)
new_list=[]
for file in file_list:
file_path=os.path.join(file_dir,file) #重構(gòu)文件路徑
dataframe=pd.read_excel(file_path) #將excel轉(zhuǎn)換成DataFrame
new_list.append(dataframe)
df = pd.concat(new_list) #數(shù)據(jù)轉(zhuǎn)換
#數(shù)據(jù)清洗
df2=df
df2=df2.set_axis(df2.iloc[1],axis=1,inplace=False) #設(shè)置第一行數(shù)據(jù)作為columns
#df=df.drop_duplicates() #去除重復(fù)行
df2=df2[~df2['業(yè)務(wù)時(shí)間'].isin(['業(yè)務(wù)時(shí)間'])] #篩除指定行
df2.dropna(subset=['業(yè)務(wù)時(shí)間'],inplace=True) #去除業(yè)務(wù)時(shí)間為NaN的行
df2=df2.reset_index(drop=True)
df2.to_excel(new_filename,index=False)
time_end=time.time()
print('time cost',time_end-time_start,'s')
df2