import pydicom,kornia,skimage
from fastai.vision.all import *
from fastai.medical.imaging import *
from torchvision.utils import save_image

from fmi.pipeline import *
from fmi.explore import get_dicom_image

Mask & Save

Note: View Mask & Save tutorial on Kaggle (click on Kaggle icon)

def mask_and_save_path(file: (L), source=None, show=False, window=dicom_windows.lungs, sigma:float=0.1,\
                  thresh:float=0.9, save=False, save_path=None):
    "Helper to create masks based on dicom window with the option to save the updated image from path"
    image_list = []
    for i in file:
        ##This line will have to be changed depending on what platform is being used
        str_file = str(i); file_name = str_file.split('.')[0].split('\\')[-1] #windows
        #str_file = str(i); file_name = str_file.split('/')[-1].split('.')[0] #kaggle
        dcm = dcmread(i)
        wind = dcm.windowed(*window)
        mask = dcm.mask_from_blur(window, sigma=sigma, thresh=thresh, remove_max=False)
        bbs = mask2bbox(mask)
        lo,hi = bbs
        imh = wind[lo[0]:hi[0],lo[1]:hi[1]]
        if save is not False:
            if not os.path.exists(save_path):
                os.makedirs(save_path)   
            save_image(imh, f'{save_path}/{file_name}.png')
        else:
            pass
        image_list.append(imh)
    if show is not False:
        show_images(image_list[:10], nrows=1)
    else:
        pass

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-1c40db12482e> in <module>
----> 1 show_doc(mask_and_save_path)

NameError: name 'show_doc' is not defined

def mask_and_save_df(file: (pd.DataFrame), source=None, show=False, window=dicom_windows.lungs, sigma:float=0.1,\
                  thresh:float=0.9, save=False, save_path=None):
    "Helper to create masks based on dicom window with the option to save the updated image from a dataframe"
    image_list = []
    for i in file.index:
        file_path = f"{source}/{file.iloc[i]['PatientID']}/{file.iloc[i]['InstanceNumber']}.dcm"
        file_name = file.iloc[i]['InstanceNumber']
        dcm = dcmread(file_path)
        wind = dcm.windowed(*window)
        mask = dcm.mask_from_blur(window, sigma=sigma, thresh=thresh, remove_max=False)
        bbs = mask2bbox(mask)
        lo,hi = bbs
        imh = wind[lo[0]:hi[0],lo[1]:hi[1]]
        if save is not False:
            if not os.path.exists(save_path):
                os.makedirs(save_path)   
            save_image(imh, f'{save_path}/{file_name}.png')
        else:
            pass
        image_list.append(imh)
    if show is not False:
        show_images(image_list[:10], nrows=1)
    else:
        pass

fastai has a handy method from.dicoms that can access dicom metadata and display this in a dataframe.

m_items = get_dicom_files('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430')
source = 'D:/Datasets/osic-pulmonary-fibrosis-progression/train/'
dicom_dataframe = pd.DataFrame.from_dicoms(m_items)
dicom_dataframe[:2]

To see how mask_and_save works here are 10 original images (we can use get_dicom_image to view the images). To save the images in png format change save to True and set a save_path

get_dicom_image(dicom_dataframe[:10], 'PatientID', source=source, nrows=1)

Using a sigma of 0.2 and a lung window

mask_and_save_df(dicom_dataframe[:10], window=dicom_windows.lungs, show=True, source=source, sigma=0.2, save=False)

mask_and_save_df(dicom_dataframe[:10], window=dicom_windows.lungs, show=True, source=source, sigma=0.1, save=False)

mask_and_save_df(dicom_dataframe[:10], window=dicom_windows.lungs, show=True, source=source, sigma=0.01, save=False)

Setting the sigma value to 0.1 reduces the image area only to the areas that are important

Dicom metadata dict

Updated from_dicoms method to from_dicoms2 that allows you to choose the window setting

@patch
def updated_dict(self:DcmDataset, windows=[dicom_windows.lungs]):
    pxdata = (0x7fe0,0x0010)
    vals = [self[o] for o in self.keys() if o != pxdata]
    its = [(v.keyword, v.value) for v in vals]
    res = dict(its)
    res['fname'] = self.filename
    
    stats = 'min', 'max', 'mean', 'std'
    pxs = self.pixel_array
    for f in stats: res['img_'+f] = getattr(pxs, f)()
    res['img_pct_window'] = self.pct_in_window(*windows)
    res['file_path'] = f'{self.PatientID}/{self.InstanceNumber}.dcm'
    return res

def _dcm2dict2(fn, windows, **kwargs): return fn.dcmread().updated_dict(windows, **kwargs)

@delegates(parallel)
def _from_dicoms2(cls, fns, n_workers=0, **kwargs):
    return pd.DataFrame(parallel(_dcm2dict2, fns, n_workers=n_workers, **kwargs))
pd.DataFrame.from_dicoms2 = classmethod(_from_dicoms2)

from_dicoms2 allows you to set the dicom window, for example in this case the mediastinum window is used

dicom_dataframe = pd.DataFrame.from_dicoms2(m_items, windows=dicom_windows.mediastinum)
dicom_dataframe[:2]

In this case the lungs window was used

dicom_dataframe = pd.DataFrame.from_dicoms2(m_items, windows=dicom_windows.lungs)
dicom_dataframe[:2]

Move Files

Note: View dicom dataframe and move_files tutorial on Kaggle (click on Kaggle icon)

def move_files(df, source, save_path):
    "helper to move files"
    for i in df.index:
        #patient ID
        patid = str(df.PatientID[i])
        window = str(df.img_pct_window[i])
        
        #fname
        filename = str(df.fname[i]).split('/')[-1]
        img = filename.split('.')[0]
        print(f'ID: {patid} window: {window} instance: {img}')
        
        folder_path = save_path + patid
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)   
        img_file = Path(f'{source}/train/{patid}/{img}.dcm')
        shutil.copy(img_file, folder_path, follow_symlinks=True)

dicom convert 3channel

def dicom_convert_3channel(fn:(Path,str), save_dir:(str), win1=dicom_windows.lungs, \
                           win2=dicom_windows.liver, win3=dicom_windows.brain):
    "Split a dicom image into 3 windows with one window per channel and saved as jpg"
    data = dcmread(fn)
    file_name = str(fn); name = file_name.split('\\')[-1].split('.')[0]
        
    chan_one = np.expand_dims(data.windowed(*win1), axis=2)
    chan_two = np.expand_dims(data.windowed(*win2), axis=2)
    chan_three = np.expand_dims(data.windowed(*(win3)), axis=2)
    image = np.concatenate([chan_one, chan_two, chan_three], axis=2)
    ten_image = TensorImage(image).permute(2,0,1)
    save_image(ten_image, f'{save_dir}/{name}.jpg')

To see how dicom_convert_3channel works, specify a save directory and choose a test file

save_dir = 'D:/Datasets/osic-pulmonary-fibrosis-progression/test3c/'
test1 = m_items[12]
test1

Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00007637202177411956430/20.dcm')

Choose 3 windows, one for each channel, in this case lungs, mediastinum and pe

dicom_convert_3channel(test1, save_dir, win1=dicom_windows.lungs, win2=dicom_windows.mediastinum, win3=dicom_windows.pe)

Load the saved image which saves with the same name as the input image name

saved_image = PILImage.create('D:/Datasets/osic-pulmonary-fibrosis-progression/test3c/20.jpg')
saved_ten = TensorImage(saved_image)
saved_ten.shape

torch.Size([512, 512, 3])

show_images([saved_ten[:,:,:], saved_ten[:,:,0], saved_ten[:,:,1], saved_ten[:,:,2]],\
            titles=['all_channels', 'lungs', 'medistinum', 'pe'])

Dicom Splitter

Note: View dicom splitter tutorial on kaggle (click Kaggle icon)

def dicomsplit(valid_pct=0.2, seed=None, **kwargs):
    "Splits `items` between train/val with `valid_pct`"
    "and checks if identical patient IDs exist in both the train and valid sets"
    def _inner(o, **kwargs):
        train_list = []; valid_list = []
        if seed is not None: torch.manual_seed(seed)
        rand_idx = L(int(i) for i in torch.randperm(len(o)))
        cut = int(valid_pct * len(o))
        trn = rand_idx[cut:]; trn_p = o[rand_idx[cut:]]
        val = rand_idx[:cut]; val_p = o[rand_idx[:cut]]
        train_patient = []; train_images = []
        for i, tfile in enumerate(trn_p):
            file = dcmread(tfile)
            tpat = file.PatientID
            train_patient.append(tpat)
            file_array = dcmread(tfile).pixel_array
            train_images.append(file_array)
        val_patient = []; val_images = []
        for i, vfile in enumerate(val_p):
            file2 = dcmread(vfile)
            vpat = file2.PatientID
            val_patient.append(vpat)
            val_array = dcmread(vfile).pixel_array
            val_images.append(val_array)
    
        print(rand_idx)
        print(f'Train: {trn}, {train_patient}')
        show_images(train_images[:20])
        print(f'Val: {val}, {val_patient}')
        show_images(val_images[:20])
        is_duplicate = set(train_patient) & set(val_patient)
        print(f'Duplicate: {set(train_patient) & set(val_patient)}')
        new_list = []
        if bool(is_duplicate) is not False:
            print('duplicate exists')
            new_list = [elem for elem in train_patient if elem not in val_patient ]
            print(f'New List: {new_list}')
        else:
            print('duplicate does NOT exist')
            new_list = trn
        return new_list, val
    return _inner

def check_duplicate(items, seed=5):
    trn, val = dicomsplit(valid_pct=0.2, seed=seed)(items)
    return trn, val

def dicom_splitter(items, valid_pct=0.2, seed=77):
    trn, val = dicomsplit(valid_pct=valid_pct)(items)
    valid_idx = val
    def _inner(o):
        train_idx = np.setdiff1d(np.array(range_of(o)), np.array(valid_idx))
        print(f'train:{train_idx} val:{valid_idx}')
        return L(train_idx, use_list=True), L(valid_idx, use_list=True)
    return _inner

Check to see how dicom_splittter works. First create a random generating function that will choose 10 random numbers between a range of 0 and the length of the number of items

def random_(items, value=10):
    randomList = []
    for i in range(0,value):
        randomList.append(random.randint(0,len(items)))
    return items[randomList]

items = get_dicom_files(source)
rand_items = random_(items)
rand_items

(#10) [Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00173637202238329754031/207.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00020637202178344345685/119.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00136637202224951350618/161.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00317637202283194142136/178.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00378637202298597306391/172.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00020637202178344345685/70.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00343637202287577133798/34.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00196637202246668775836/36.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00075637202198610425520/148.dcm'),Path('D:/Datasets/osic-pulmonary-fibrosis-progression/train/ID00111637202210956877205/171.dcm')]

check_duplicate shows the indices of the random 10 images chosen above. By default the train/valid split is 80/20 so the train set has 8 images and the valid set has 2 images. We can view the the 8 images in the train set and the 2 images in the valid set.

set_seed(70)
check_duplicate(rand_items, seed=70)

[4, 9, 0, 7, 2, 1, 6, 3, 5, 8]
Train: [0, 7, 2, 1, 6, 3, 5, 8], ['ID00173637202238329754031', 'ID00196637202246668775836', 'ID00136637202224951350618', 'ID00020637202178344345685', 'ID00343637202287577133798', 'ID00317637202283194142136', 'ID00020637202178344345685', 'ID00075637202198610425520']
Val: [4, 9], ['ID00378637202298597306391', 'ID00111637202210956877205']
Duplicate: set()
duplicate does NOT exist

((#8) [0,7,2,1,6,3,5,8], (#2) [4,9])

In the example above there are no duplicates so the train/valid sets remain the same

set_seed(7)
check_duplicate(rand_items, seed=7)

[5, 0, 3, 4, 1, 7, 9, 6, 8, 2]
Train: [3, 4, 1, 7, 9, 6, 8, 2], ['ID00317637202283194142136', 'ID00378637202298597306391', 'ID00020637202178344345685', 'ID00196637202246668775836', 'ID00111637202210956877205', 'ID00343637202287577133798', 'ID00075637202198610425520', 'ID00136637202224951350618']
Val: [5, 0], ['ID00020637202178344345685', 'ID00173637202238329754031']
Duplicate: {'ID00020637202178344345685'}
duplicate exists
New List: ['ID00317637202283194142136', 'ID00378637202298597306391', 'ID00196637202246668775836', 'ID00111637202210956877205', 'ID00343637202287577133798', 'ID00075637202198610425520', 'ID00136637202224951350618']

(['ID00317637202283194142136',
  'ID00378637202298597306391',
  'ID00196637202246668775836',
  'ID00111637202210956877205',
  'ID00343637202287577133798',
  'ID00075637202198610425520',
  'ID00136637202224951350618'],
 (#2) [5,0])

	SpecificCharacterSet	ImageType	SOPInstanceUID	Modality	Manufacturer	ManufacturerModelName	PatientName	PatientID	PatientSex	DeidentificationMethod	...	ImageOrientationPatient3	ImageOrientationPatient4	ImageOrientationPatient5	MultiPixelSpacing	PixelSpacing1	img_min	img_max	img_mean	img_std	img_pct_window
0	ISO_IR 100	ORIGINAL	2.25.12297650151329871895440507938349160734	CT	GE MEDICAL SYSTEMS	LightSpeed VCT	(I, D, 0, 0, 0, 0, 7, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 1, 1, 9, 5, 6, 4, 3, 0)	ID00007637202177411956430		Table;	...	0.0	1.0	0.0	1	0.652344	-2000	2842	-1.454884	1137.488858	0.058094
1	ISO_IR 100	ORIGINAL	2.25.37611372879908126511187998276199853341	CT	GE MEDICAL SYSTEMS	LightSpeed VCT	(I, D, 0, 0, 0, 0, 7, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 1, 1, 9, 5, 6, 4, 3, 0)	ID00007637202177411956430		Table;	...	0.0	1.0	0.0	1	0.652344	-2000	2918	19.038597	1138.876560	0.068130

	SpecificCharacterSet	ImageType	SOPInstanceUID	Modality	Manufacturer	ManufacturerModelName	PatientName	PatientID	PatientSex	DeidentificationMethod	...	RescaleIntercept	RescaleSlope	RescaleType	fname	img_min	img_max	img_mean	img_std	img_pct_window	file_path
0	ISO_IR 100	[ORIGINAL, PRIMARY, AXIAL]	2.25.12297650151329871895440507938349160734	CT	GE MEDICAL SYSTEMS	LightSpeed VCT	(I, D, 0, 0, 0, 0, 7, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 1, 1, 9, 5, 6, 4, 3, 0)	ID00007637202177411956430		Table;	...	-1024.0	1.0	HU	D:\Datasets\osic-pulmonary-fibrosis-progression\train\ID00007637202177411956430\1.dcm	-2000	2842	-1.454884	1137.488858	0.219498	ID00007637202177411956430/1.dcm
1	ISO_IR 100	[ORIGINAL, PRIMARY, AXIAL]	2.25.37611372879908126511187998276199853341	CT	GE MEDICAL SYSTEMS	LightSpeed VCT	(I, D, 0, 0, 0, 0, 7, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 1, 1, 9, 5, 6, 4, 3, 0)	ID00007637202177411956430		Table;	...	-1024.0	1.0	HU	D:\Datasets\osic-pulmonary-fibrosis-progression\train\ID00007637202177411956430\10.dcm	-2000	2918	19.038597	1138.876560	0.248634	ID00007637202177411956430/10.dcm

	SpecificCharacterSet	ImageType	SOPInstanceUID	Modality	Manufacturer	ManufacturerModelName	PatientName	PatientID	PatientSex	DeidentificationMethod	...	RescaleIntercept	RescaleSlope	RescaleType	fname	img_min	img_max	img_mean	img_std	img_pct_window	file_path
0	ISO_IR 100	[ORIGINAL, PRIMARY, AXIAL]	2.25.12297650151329871895440507938349160734	CT	GE MEDICAL SYSTEMS	LightSpeed VCT	(I, D, 0, 0, 0, 0, 7, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 1, 1, 9, 5, 6, 4, 3, 0)	ID00007637202177411956430		Table;	...	-1024.0	1.0	HU	D:\Datasets\osic-pulmonary-fibrosis-progression\train\ID00007637202177411956430\1.dcm	-2000	2842	-1.454884	1137.488858	0.702209	ID00007637202177411956430/1.dcm
1	ISO_IR 100	[ORIGINAL, PRIMARY, AXIAL]	2.25.37611372879908126511187998276199853341	CT	GE MEDICAL SYSTEMS	LightSpeed VCT	(I, D, 0, 0, 0, 0, 7, 6, 3, 7, 2, 0, 2, 1, 7, 7, 4, 1, 1, 9, 5, 6, 4, 3, 0)	ID00007637202177411956430		Table;	...	-1024.0	1.0	HU	D:\Datasets\osic-pulmonary-fibrosis-progression\train\ID00007637202177411956430\10.dcm	-2000	2918	19.038597	1138.876560	0.720448	ID00007637202177411956430/10.dcm

preprocessing

Mask & Save

`mask_and_save_df`[source]

Dicom metadata dict

Move Files

`move_files`[source]

dicom convert 3channel

`dicom_convert_3channel`[source]

Dicom Splitter

`dicomsplit`[source]

`dicom_splitter`[source]

Mask & Save

mask_and_save_df[source]

Dicom metadata dict

Move Files

move_files[source]

dicom convert 3channel

dicom_convert_3channel[source]

Dicom Splitter

dicomsplit[source]

dicom_splitter[source]

`mask_and_save_df`[source]

`move_files`[source]

`dicom_convert_3channel`[source]

`dicomsplit`[source]

`dicom_splitter`[source]