處理文件

file_encoding(filepath) ：小文件的時候調用，獲取文件的編碼乘盼，可以傳入open()函數(shù)encoding參數(shù)升熊。filepath是個文件
file_encoding_big(filepath) ：同上，區(qū)別是大文件調用绸栅，返回文件編碼级野。filepath是個文件
change_to_utf8(filepath) ：解決目錄下所有文件，亂碼的情況粹胯。不管是csv還是文本還是2進制文件蓖柔。filepath是個文件夾
clear_dir(filepath, save_dir=True)：將目錄清空，save_dir=False表示這個目錄刪除
file_backups(old_dir, new_dir)：目錄整體備份风纠，從old_dir,備份到new_dir文件夾

# -*- coding: utf-8 -*-
"""
===========================
# @Time : 2020/8/5 16:39
# @File  : handele_file.py
# @Author: adeng
# @Date  : 2020/8/5
============================
"""

import chardet
import os
from datetime import datetime


class HanldeFile():
    pass
    """
    file_encoding:函數(shù)返回文件編碼
    file_encoding_big 處理大文件返回文件編碼

    """
    @staticmethod
    def file_encoding(filepath):
        """
        filepath： 文件的路徑况鸣，非目錄
        返回的是一個文件的編碼格式
        """
        if not os.path.isfile(filepath):
            print("這不是一個文件")
            return
        with open(filepath, "rb") as f1:
            data = f1.read()
            # {'encoding': 編碼, 'confidence': 可信度, 'language': 語言}
            result = chardet.detect(data)
            print(result)
            encoding = result.get("encoding")
        return encoding

    @staticmethod
    def file_encoding_big(filepath):
        """
        對大文件獲取encoding的處理方式
        """
        if not os.path.isfile(filepath):
            print("這不是一個文件")
            return
        # 默認一個字典的key為None
        dict_encoding = dict(encoding=None)
        list_encod = []
        bytess = 0
        with open(filepath, mode="rb") as f:
            for i in range(50):  # 對大文件讀取50次
                data = f.read(6000)
                bytess += 1
                if len(data) == 0:
                    break
                res = chardet.detect(data)
                encoding = res.get("encoding")
                if not encoding:
                    continue
                list_encod.append(encoding)
        print(list_encod)
        if len(list_encod) == 1 and len(list_encod) != 0:
            dict_encoding["encoding"] = list_encod[-1]
        elif bytess == 1:  # 這里表示打開讀取文件內容為空，看上面的for循環(huán)只讀了一次竹观。
            print("此文件為空镐捧，刪除文件重新創(chuàng)建一個編碼為【utf-8】的同文件名")
            t = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S')
            os.remove(filepath)
            with open(filepath, mode="w", encoding="utf-8") as new_file:
                pass
            dict_encoding["encoding"] = new_file.encoding
        if len(list_encod) > 1:
            list_des = list(set(list_encod))  # 去重
            count_dict = {}
            for i in list_des:
                num = list_encod.count(i)
                count_dict.setdefault(num, i)  # 不存在就新增到字典
            print(count_dict)
            # sorted()排序
            res_list = sorted(count_dict, key=lambda k: k, reverse=True)
            dict_encoding["encoding"] = count_dict[res_list[0]]
        return dict_encoding["encoding"]

    @staticmethod
    def change_to_utf8(filepath):
        """
        # 處理文件目錄文件亂碼的
        """
        if not os.path.isdir(filepath):
            print(f"{filepath}這不是一個目錄")
            return
        for root, dirs, files in os.walk(filepath):
            pass

            for f in files:
                old_path = os.path.join(root, f)
                t = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S')
                write_path = os.path.join(root, f"{t}" + f)
                try:
                    with open(old_path, mode="r", encoding=HanldeFile.file_encoding_big(old_path)) as read_old:
                        data = read_old.read()
                        with open(write_path, mode="w", encoding="utf8") as write_new:
                            write_new.write(data)
                    # 刪除原文件，寫入的新文件重命名為原文件
                    os.remove(old_path)
                    os.rename(write_path, old_path)
                except Exception as e:
                    with open(old_path, mode="rb") as read_old1:
                        pass
                        with open(write_path, mode="wb") as write_new1:
                            write_new1.write(read_old1.read())
                    # 刪除原文件臭增，寫入的新文件重命名為原文件
                    os.remove(old_path)
                    os.rename(write_path, old_path)

    @staticmethod
    def clear_dir(filepath, save_dir=True):
        """
        清空目錄包括子目錄
        """
        if not os.path.isdir(filepath):
            print(f"{filepath}這不是一個目錄")
            return
        # 目錄（包括子目錄）下所有文件刪除,目錄保留
        for root, dirs, files in os.walk(filepath):
            for f in files:
                old_path = os.path.join(root, f)
                os.remove(old_path)
        # 下面代碼是為了刪除目錄
        if not save_dir:
            list_dir = []
            for root, dirs, files in os.walk(filepath):
                if not dirs:
                    list_dir.append(root)
                    continue
            print(list_dir)
            # 對空文件夾按層級目錄刪除懂酱，os.removedirs(path)
            for f1 in list_dir:
                os.removedirs(f1)

    @staticmethod
    def file_backups(old_dir, new_dir):
        """
        備份，ole_dir:原來的文件夾誊抛，new_dir:備份后的文件夾
        """
        if not os.path.isdir(old_dir):
            print(f"傳入的{old_dir}不是一個目錄")
            return
        for root, dirs, files in os.walk(old_dir):
            new_root = root.replace(old_dir, new_dir)
            if not os.path.exists(new_root):
                os.mkdir(new_root)
            # # 創(chuàng)建對應的文件夾.注釋的代碼可不要用來理解的
            # for dir in dirs:
            #     dir = os.path.join(new_root, dir)
            #     if not os.path.exists(dir):
            #         os.mkdir(dir)
            # 取出所有文件列牺，打開文件，重新寫入
            for f in files:
                old_filename = os.path.join(root, f)
                new_filename = os.path.join(new_root, f)
                with open(old_filename, mode="rb") as f1, open(new_filename, "wb") as f2:
                    for line in f1:
                        f2.write(line)


if __name__ == '__main__':
    pass

上面代碼已經(jīng)測試過芍锚，測試過程我就不發(fā)了

實戰(zhàn)：文件處理：獲取編碼，解決亂碼介衔，備份目錄恨胚，清空文件

實戰(zhàn)：文件處理：獲取編碼，解決亂碼炎咖，備份目錄赃泡，清空文件

處理文件