1.最近幫醫(yī)院醫(yī)生處理原始MRI影像數(shù)據(jù)時(shí)凯沪,遇到了一些問(wèn)題,寫(xiě)此文章簡(jiǎn)單記錄下技即。2.醫(yī)生的要求是把每個(gè)病人的MRI影像特定的序列(T2著洼、DWI、DCE增強(qiáng)序列)的影像數(shù)據(jù)的特定區(qū)域分割出來(lái)而叼;并按照序列由原來(lái)的dicom格式存儲(chǔ)成NIFTI(.nii)格式身笤。
從醫(yī)院拷貝出來(lái)的原始數(shù)據(jù)存在以下幾種問(wèn)題
1)數(shù)據(jù)格式非常混亂
2)有些dicom文件無(wú)法讀取
3)部分文件夾為空
4)部分dicom文件大小為0KB
5)摻雜其他模態(tài)的影像數(shù)據(jù)eg:鉬靶影像數(shù)據(jù)
6)MRI文件夾名字無(wú)規(guī)則(命名方式不易進(jìn)行后處理)
7)所有dicom文件混亂在一起沒(méi)有按序列存儲(chǔ)
面對(duì)這么“臟”的數(shù)據(jù)葵陵,拿到那一刻感覺(jué)要崩了液荸,,脱篙,不過(guò)還好之前寫(xiě)過(guò)類(lèi)似的python腳本娇钱,修改下可以用來(lái)處理伤柄。以下便是預(yù)處理階段的python代碼
1.刪除原始數(shù)據(jù)中的空文件以及空文件夾
import os
def del_empty(path):
for (root, dirs, files) in os.walk(path):
for item in files:
path = os.path.join(root, item)
size = os.path.getsize(path)
if size == 0:
os.remove(path)
print('成功刪除文件' + path)
for item in dirs:
path = os.path.join(root, item)
try:
os.removedirs(path)
print('成功刪除文件夾' + path)
except Exception as e:
print('Exception',e)
定義路徑,調(diào)用函數(shù)執(zhí)行
path1 = r'G:\2020年10月需要去皮膚數(shù)據(jù)\1'
del_empty(path1)
2.刪除包含MG(鉬靶)的文件夾
最好把原始數(shù)據(jù)進(jìn)行備份后再執(zhí)行此步驟
import shutil
def delete_MG(folder):
for every_study in os.listdir(folder):
if os.path.isdir(os.path.join(folder,every_study)):
next_folder = os.path.join(folder,every_study)
files = os.listdir(next_folder)
for i ,f in enumerate(files):
if f.find("MG")>=0:
if os.path.isdir(os.path.join(next_folder,f)):
shutil.rmtree(os.path.join(next_folder,f))
print(i,f,os.path.join(next_folder,f))
else:
os.remove(os.path.join(next_folder,f))
delete_MG(r"G:\去皮膚數(shù)據(jù)Copy\2")
3.重命名病歷號(hào)以及MRI文件夾名字
import glob
import SimpleITK as sitk #Note:注意SimpleITK不支持中文文搂,即路徑中不能有中文
import os
import pandas as pd
import pydicom
#重命名文件夾
def rename_study(folderPath):
count_study = 0
for every_study in os.listdir(folderPath):
new_studyname = '00'+every_study
#給每個(gè)收study加上00
source_path = os.path.join(folderPath,every_study)
new_path =os.path.join(folderPath,new_studyname)
print(source_path )
print('..rename to.. ')
print(new_path )
os.rename(source_path ,new_path )
count_study+=1
print('共有',count_study,'個(gè)study')
path = r"G:\去皮膚數(shù)據(jù)Copy\2"
rename_study(path)
函數(shù)功能:遍歷每個(gè)study下的MR文件适刀,并把文件名命名為dicom中的日期
def get_Dcm_info(folderPath):
count_study = 0
for every_study in os.listdir(folderPath):
count_study +=1
last_paths = glob.glob(os.path.join(folderPath,every_study,"MR","**"))
dcm_paths = []#定義個(gè)list存放所有MRI文件的路徑
for every_last_path in last_paths:
print('oldDir',every_last_path) #原始文件夾路徑名稱(chēng)
all_dicoms = os.listdir(every_last_path)
for i in range(len(all_dicoms)+1 - len(all_dicoms)):#只遍歷其中一個(gè)dicom文件,就可以獲取dicom信息
dcm = pydicom.read_file(os.path.join(every_last_path,all_dicoms[i]))
studyUid,PatientName,StudyDate = dcm.StudyID,dcm.PatientName,dcm.StudyDate#獲取dcm信息
new_path = "\\".join(every_last_path.split("\\")[:-1])+"\\"+StudyDate #新的文件夾路徑名稱(chēng)
print('newDir',new_path)
os.rename(every_last_path, new_path)
continue#只讀一個(gè)dicom文件煤蹭,這行代碼好像多余了
print('共有',count_study,'個(gè)study')
preprocessFolder =r'G:\去皮膚數(shù)據(jù)Copy\2' #定義路徑
get_Dcm_info(preprocessFolder)#調(diào)用函數(shù)
4.查看所有病例MRI都有哪些序列
import re
import time
import json
#函數(shù)功能:讀取每個(gè)study下不同時(shí)間的dicom文件信息笔喉,并存入sion
def get_dcmSeries_info(folderPath,saveJsonPath):
count_study = 0
total_List = []
for every_study in os.listdir(folderPath):
count_study +=1
MRI_list = os.listdir(os.path.join(folderPath,every_study,"MR"))
tmp_allSeries_dict = {}
tmp_allSeries_List =[]
for s in range(len(MRI_list)):
tmp_MRI= os.path.join(folderPath,every_study,"MR",MRI_list[s])
tmp_MRI_dicoms = os.listdir(tmp_MRI)
tmp_ser_names = []
tmp_dict = {}
for j in range(len(tmp_MRI_dicoms)):
dcm = pydicom.read_file(os.path.join(tmp_MRI,tmp_MRI_dicoms[j]))
SeriesDescription = dcm.SeriesDescription
tmp_ser_names.append(SeriesDescription)
tmp_ser_names = list(set(tmp_ser_names))#對(duì)列表中的元素進(jìn)行去重
tmp_dict[MRI_list[s]] = tmp_ser_names
tmp_allSeries_List.append(tmp_dict)
tmp_allSeries_dict[every_study] = tmp_allSeries_List
total_List.append(tmp_allSeries_dict)
with open(saveJsonPath,'a') as f:#數(shù)據(jù)寫(xiě)入json文件,a表示循環(huán)寫(xiě)入,不覆蓋之前的內(nèi)容
json.dump(total_List,f ,indent = 4)
print('共有',count_study,'個(gè)study')
定義文件路徑以及存儲(chǔ)路徑硝皂,并進(jìn)行文件夾遍歷
begin_time = time.time()
MRfolder_path =r'F:\預(yù)處理后需要去皮膚的數(shù)據(jù)'
saveJsonPath = "F:\\預(yù)處理后需要去皮膚的數(shù)據(jù)\\Series_result.json"
for item in os.listdir(MRfolder_path):
every_path = os.path.join(MRfolder_path,item)
if os.path.isdir(every_path):#判斷是否是文件夾
get_dcmSeries_info(every_path,saveJsonPath)#調(diào)用函數(shù)
else:
print(MRfolder_path,"is not dir")
end_time = time.time()
print('run code total needs ',end_time - begin_time,' s')
程序執(zhí)行過(guò)程如下:會(huì)發(fā)現(xiàn)需要的時(shí)間還是很長(zhǎng)的常挚,因?yàn)樾枰闅v每個(gè)MRI的每張dicom文件,如果哪位大佬由更好的方法歡迎批評(píng)指正稽物。
執(zhí)行完后會(huì)生成一個(gè)json文件奄毡,里面存放的是每個(gè)MRI文件夾下的所有序列名字,有這個(gè)我們可以知道此病人做了磁共振檢查的什么序列贝或。如下圖所示:
5.對(duì)想要的DCE序列進(jìn)行分類(lèi)吼过,并進(jìn)行重命名
吐槽下:后來(lái)快做完了,才知道又要其他序列的(T2,DWI),咪奖,那先,,
不過(guò)思路都是差不多的赡艰,先找到對(duì)應(yīng)序列的所有名字存在一個(gè)list中;然后遍歷每一張dicom斤葱,存到對(duì)應(yīng)以SeriesDescription命名的文件夾下
DCE_Series = ['t1_fl3d_tra_dyna_1+5','t1_fl3d_tra_dyna_1+5_NEW','fl3d_dynamic_1-pre_3-post','fl3d_dynamic_1-pre_6-post','t1_fl3d_tra_fs_1+5_p2']
def dicomSeriesClassifier(folderPath,savefolder):
count_study = 0
for every_study in os.listdir(folderPath):
count_study +=1
tmp_path = os.path.join(folderPath,every_study)
tmp_save_folder = os.path.join(savefolder,every_study)
dcm_paths = glob.glob(os.path.join(tmp_path,"MR","**","**.dcm"))
for i in range(len(dcm_paths)):
size = os.path.getsize(dcm_paths[i])
dcm = pydicom.read_file(dcm_paths[i])
saveFolderName = os.path.join(tmp_save_folder,"DCEMR",dcm_paths[i].split("\\")[-2])
seriesName,seriestime = dcm.SeriesDescription,dcm.SeriesTime
seriestime = str(seriestime).split(".")[0]#同一個(gè)序列的seriestime
source_File = dcm_paths[i].replace("/",'\\')
if seriesName in DCE_Series:#找到DCE序列
tmp_save_Folder = os.path.join(saveFolderName,str(int(seriestime)))
if not os.path.exists(tmp_save_Folder):
os.makedirs(tmp_save_Folder)
shutil.copy(source_File,tmp_save_Folder)
print(count_study,"個(gè)病例MRI序列分類(lèi)完成")
--------調(diào)用函數(shù)執(zhí)行
folderPath1 = r"G:\去皮膚數(shù)據(jù)Copy\1"
savefolder = r'G:\去皮膚數(shù)據(jù)Copy\匯總分類(lèi)1'
dicomSeriesClassifier(folderPath1,savefolder)
6.對(duì)每個(gè)DCE序列根據(jù)序列時(shí)間排序并進(jìn)行重命名
(DCE序列一般有6個(gè)序列,是病人注射對(duì)比劑后不同時(shí)間下的成像,一般情況下惡性腫瘤的影像在不同時(shí)間信號(hào)強(qiáng)度也不一樣)
def renameDCESeries(folderPath):
count_study = 0
for every_study in os.listdir(folderPath):
count_study +=1
tmp_path = os.path.join(folderPath,every_study)
find_folderPath = glob.glob(os.path.join(tmp_path,"DCEMR","**"))
for j in range(len(find_folderPath)):
temp_DCE_number = []
for every_DCE in os.listdir(find_folderPath[j]):
temp_DCE_number.append(every_DCE)
temp_DCE_number.sort()#對(duì)list中的元素進(jìn)行排序
for t in range(len(temp_DCE_number)):
oldDirName = os.path.join(find_folderPath[j],str(temp_DCE_number[t]))
newDirName = os.path.join(find_folderPath[j],"DCE0000"+str(t+1))
if not os.path.exists(newDirName):
os.rename(oldDirName, newDirName)
else:
print(newDirName,"have alredy exists")
print("已重命名",count_study,"個(gè)study")
#調(diào)用函數(shù)執(zhí)行
renameDCESeries(r"G:\去皮膚數(shù)據(jù)Copy\匯總分類(lèi)1")
上面的程序執(zhí)行完后的效果如下圖:
7.對(duì)每個(gè)DCE序列的dicom文件按InstanceNumber重命名
InstanceNumber為dicom文件的tag信息,用microdicom或者Radiant DICOM Viewer打開(kāi)影像文件時(shí)可以看到
def rename_dicom(folderPath):
count_study = 0
for every_study in os.listdir(folderPath):
count_study +=1
tmp_path = os.path.join(folderPath,every_study)
dcm_paths = glob.glob(os.path.join(tmp_path,"DCEMR","**",'**',"**.dcm"))
for i in range(len(dcm_paths)):
dcm = pydicom.read_file(dcm_paths[i])
seriesUid,InstanceNumber = dcm.SeriesInstanceUID,dcm.InstanceNumber
oldFileName = dcm_paths[i]
if InstanceNumber <10:
InstanceNumber = '0000'+str(InstanceNumber)
elif InstanceNumber <100:
InstanceNumber = '000'+str(InstanceNumber)
elif InstanceNumber <200:
InstanceNumber = '00'+str(InstanceNumber)
else:
print('Warning!!',InstanceNumber,"dicom數(shù)量大于200")
newFileName = os.path.join("\\".join(dcm_paths[i].split("\\")[:-1]),InstanceNumber+".dcm")
os.rename(oldFileName, newFileName)
print(count_study,"個(gè)病例DCE-MRI序列dicom重命名完成")
folderPath=r"G:\去皮膚數(shù)據(jù)Copy\匯總分類(lèi)1"
rename_dicom(folderPath)
上面的程序執(zhí)行完后的效果如下圖(每張dicom按照順序存儲(chǔ),命名方式也易于讀取):
對(duì)于乳腺皮膚如何去掉和把分割后的數(shù)據(jù)轉(zhuǎn)成nii格式,后面將詳細(xì)闡述
python醫(yī)學(xué)影像2Ddicom文件轉(zhuǎn)成3Dnii文件(保留原始dicom信息)
說(shuō)明:本文為原創(chuàng)文章慷垮,轉(zhuǎn)載或引用請(qǐng)注明網(wǎng)址和標(biāo)題;有不正確的地方歡迎批評(píng)指正nick.yu.jd@qq.com