假如 xml文件都藏在這些文件夾里面
文件夾里面還有子文件夾趴梢,多重子文件夾套娃
要注意里面的xml文件對應(yīng)著同名的jpg文件漠畜!
處理后的結(jié)果,所有xml按照標(biāo)簽分好類丟進(jìn)對應(yīng)的文件夾里面
按照 annotation/imgs/labels 創(chuàng)建了文件夾垢油,對應(yīng)的文件都在里面
import os
import xml.etree.ElementTree as ET
from shutil import copyfile
import pathlib as pathlib
from collections import defaultdict
def parse_obj(filename):
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'): #獲取所有object,定位到每一個object里面
obj_struct = {}
obj_struct['name'] = obj.find('name').text #獲取被name夾在中間的label名稱圆丹,具體打開xml文件看
objects.append(obj_struct)
return objects
def getallfiles(path):
allfile = []
file_xml = []
for dirpath, dirnames, filenames in os.walk(path):
for dir in dirnames:
allfile.append(os.path.join(dirpath, dir))
for name in filenames:
allfile.append(os.path.join(dirpath, name))
for file in allfile:
if file.endswith('.xm',-4,-1):
file_xml.append(file)
return file_xml
if __name__ == '__main__':
# filenamess = os.listdir('E:\code_yyq\get_data')
filenames = getallfiles(r'D:\Yuqian_Yang\project_yolov4\yolo\data\smokephone\imgs')
aim_root = 'D:\Yuqian_Yang\project_yolov4\yolo\data\smoke' # 轉(zhuǎn)移的文件夾路徑滩愁;注意下劃線
classnames = []
recs={}
copy_key = defaultdict(list)
for i, name in enumerate(filenames):
recs[name] = parse_obj(name)
if not os.path.exists(aim_root):
os.makedirs(aim_root)
for name in filenames:
num_key = 0
for object in recs[name]:
if object['name'] not in classnames:
copy_key['%s' %(object['name'])].append(name)
if not os.path.exists(aim_root + "/%s" % object['name']):
os.makedirs(aim_root + "/%s" % object['name'])
os.makedirs(aim_root + "/%s" % object['name'] + "/anotation")
os.makedirs(aim_root + "/%s" % object['name'] + "/imgs")
os.makedirs(aim_root + "/%s" % object['name'] + "/labels")
for name in copy_key:
for i in range(0, len(copy_key['%s'%name])):
file_jpg = "%s" % copy_key['%s'%name][i].replace("xml", "jpg").replace("\\", "/").replace("http://","/")
file_JPG = "%s" % copy_key['%s' % name][i].replace("xml", "JPG").replace("\\", "/").replace("http://", "/")
# print(file_jpg)
file_xml = "%s" % copy_key['%s' % name][i]
f_jpg = os.path.basename(file_jpg)
f_JPG = os.path.basename(file_JPG)
f_xml = os.path.basename(file_xml)
if pathlib.Path(file_jpg).exists():
try:
copyfile(file_jpg, aim_root + "/%s" % name + "/imgs" + "/%s" %f_jpg)
copyfile(file_xml, aim_root + "/%s" % name + "/anotation" + "/%s" %f_xml)
except:
print("warning:", file_jpg)
elif pathlib.Path(file_JPG).exists():
try:
copyfile(file_jpg, aim_root + "/%s" % name + "/imgs" + "/%s" %f_JPG)
copyfile(file_xml, aim_root + "/%s" % name + "/anotation" + "/%s" %f_xml)
except:
print("warning:", file_jpg)
else:
print("Error: no such jpg file:", file_jpg)