處理文件
- file_encoding(filepath) :小文件的時候調用,獲取文件的編碼乘盼,可以傳入open()函數(shù)encoding參數(shù)升熊。
filepath是個文件
- file_encoding_big(filepath) :同上,區(qū)別是大文件調用绸栅,返回文件編碼级野。
filepath是個文件
- change_to_utf8(filepath) : 解決目錄下所有文件,亂碼的情況粹胯。不管是csv還是文本還是2進制文件蓖柔。filepath是個文件夾
- clear_dir(filepath, save_dir=True):將目錄清空,save_dir=False表示這個目錄刪除
- file_backups(old_dir, new_dir): 目錄整體備份风纠,從old_dir,備份到new_dir文件夾
# -*- coding: utf-8 -*-
"""
===========================
# @Time : 2020/8/5 16:39
# @File : handele_file.py
# @Author: adeng
# @Date : 2020/8/5
============================
"""
import chardet
import os
from datetime import datetime
class HanldeFile():
pass
"""
file_encoding:函數(shù)返回文件編碼
file_encoding_big 處理大文件返回文件編碼
"""
@staticmethod
def file_encoding(filepath):
"""
filepath: 文件的路徑况鸣,非目錄
返回的是一個文件的編碼格式
"""
if not os.path.isfile(filepath):
print("這不是一個文件")
return
with open(filepath, "rb") as f1:
data = f1.read()
# {'encoding': 編碼, 'confidence': 可信度, 'language': 語言}
result = chardet.detect(data)
print(result)
encoding = result.get("encoding")
return encoding
@staticmethod
def file_encoding_big(filepath):
"""
對大文件獲取encoding的處理方式
"""
if not os.path.isfile(filepath):
print("這不是一個文件")
return
# 默認一個字典的key為None
dict_encoding = dict(encoding=None)
list_encod = []
bytess = 0
with open(filepath, mode="rb") as f:
for i in range(50): # 對大文件讀取50次
data = f.read(6000)
bytess += 1
if len(data) == 0:
break
res = chardet.detect(data)
encoding = res.get("encoding")
if not encoding:
continue
list_encod.append(encoding)
print(list_encod)
if len(list_encod) == 1 and len(list_encod) != 0:
dict_encoding["encoding"] = list_encod[-1]
elif bytess == 1: # 這里表示打開讀取文件內容為空,看上面的for循環(huán)只讀了一次竹观。
print("此文件為空镐捧,刪除文件重新創(chuàng)建一個編碼為【utf-8】的同文件名")
t = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S')
os.remove(filepath)
with open(filepath, mode="w", encoding="utf-8") as new_file:
pass
dict_encoding["encoding"] = new_file.encoding
if len(list_encod) > 1:
list_des = list(set(list_encod)) # 去重
count_dict = {}
for i in list_des:
num = list_encod.count(i)
count_dict.setdefault(num, i) # 不存在就新增到字典
print(count_dict)
# sorted()排序
res_list = sorted(count_dict, key=lambda k: k, reverse=True)
dict_encoding["encoding"] = count_dict[res_list[0]]
return dict_encoding["encoding"]
@staticmethod
def change_to_utf8(filepath):
"""
# 處理文件目錄文件亂碼的
"""
if not os.path.isdir(filepath):
print(f"{filepath}這不是一個目錄")
return
for root, dirs, files in os.walk(filepath):
pass
for f in files:
old_path = os.path.join(root, f)
t = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S')
write_path = os.path.join(root, f"{t}" + f)
try:
with open(old_path, mode="r", encoding=HanldeFile.file_encoding_big(old_path)) as read_old:
data = read_old.read()
with open(write_path, mode="w", encoding="utf8") as write_new:
write_new.write(data)
# 刪除原文件,寫入的新文件重命名為原文件
os.remove(old_path)
os.rename(write_path, old_path)
except Exception as e:
with open(old_path, mode="rb") as read_old1:
pass
with open(write_path, mode="wb") as write_new1:
write_new1.write(read_old1.read())
# 刪除原文件臭增,寫入的新文件重命名為原文件
os.remove(old_path)
os.rename(write_path, old_path)
@staticmethod
def clear_dir(filepath, save_dir=True):
"""
清空目錄包括子目錄
"""
if not os.path.isdir(filepath):
print(f"{filepath}這不是一個目錄")
return
# 目錄(包括子目錄)下所有文件刪除,目錄保留
for root, dirs, files in os.walk(filepath):
for f in files:
old_path = os.path.join(root, f)
os.remove(old_path)
# 下面代碼是為了刪除目錄
if not save_dir:
list_dir = []
for root, dirs, files in os.walk(filepath):
if not dirs:
list_dir.append(root)
continue
print(list_dir)
# 對空文件夾按層級目錄刪除懂酱,os.removedirs(path)
for f1 in list_dir:
os.removedirs(f1)
@staticmethod
def file_backups(old_dir, new_dir):
"""
備份,ole_dir:原來的文件夾誊抛,new_dir:備份后的文件夾
"""
if not os.path.isdir(old_dir):
print(f"傳入的{old_dir}不是一個目錄")
return
for root, dirs, files in os.walk(old_dir):
new_root = root.replace(old_dir, new_dir)
if not os.path.exists(new_root):
os.mkdir(new_root)
# # 創(chuàng)建對應的文件夾.注釋的代碼可不要用來理解的
# for dir in dirs:
# dir = os.path.join(new_root, dir)
# if not os.path.exists(dir):
# os.mkdir(dir)
# 取出所有文件列牺,打開文件,重新寫入
for f in files:
old_filename = os.path.join(root, f)
new_filename = os.path.join(new_root, f)
with open(old_filename, mode="rb") as f1, open(new_filename, "wb") as f2:
for line in f1:
f2.write(line)
if __name__ == '__main__':
pass
上面代碼已經(jīng)測試過芍锚,測試過程我就不發(fā)了