我试图运行以下脚本,据说这对某些人有效:
"""Preprocessing script.
This script walks over the directories and dump the frames into a csv file
"""
import os
import csv
import sys
import random
import scipy
from scipy import misc as m
import numpy as np
import dicom
from skimage import io, transform
from joblib import Parallel, delayed
import dill
os.chdir("P:\\hackr\\dsb")
def mkdir(fname):
try:
os.mkdir(fname)
except:
pass
def get_frames(root_path):
"""Get path to all the frame in view SAX and contain complete frames"""
ret = []
for root, _, files in os.walk(root_path):
if len(files) == 0 or not files[0].endswith(".dcm") or root.find("sax") == -1:
continue
prefix = files[0].rsplit('-', 1)[0]
fileset = set(files)
expected = ["%s-%04d.dcm" % (prefix, i + 1) for i in range(30)]
if all(x in fileset for x in expected):
ret.append([root + "/" + x for x in expected])
# sort for reproduciblity
return sorted(ret, key = lambda x: x[0])
def get_label_map(fname):
labelmap = {}
fi = open(fname)
fi.readline()
for line in fi:
arr = line.split(',')
labelmap[int(arr[0])] = line
return labelmap
def write_label_csv(fname, frames, label_map):
fo = open(fname, "w")
for lst in frames:
index = int(lst[0].split("\\")[1]) #changed from split("/")[3]
if label_map != None:
fo.write(label_map[index])
else:
fo.write("%d,0,0\n" % index)
fo.close()
def get_data(lst,preproc):
data = []
result = []
for path in lst:
f = dicom.read_file(path)
img = preproc(f.pixel_array.astype(float) / np.max(f.pixel_array))
dst_path = path.rsplit(".", 1)[0] + ".64x64.jpg"
m.imsave(dst_path, img)
result.append(dst_path)
data.append(img)
data = np.array(data, dtype=np.uint8)
data = data.reshape(data.size)
data = np.array(data,dtype=np.str_)
data = data.reshape(data.size)
return [data,result]
def write_data_csv(fname, frames, preproc):
"""Write data to csv file"""
#if __name__ == 'builtins': #'__main__':
fdata = open(fname, "w")
dr = Parallel()(delayed(get_data)(lst,preproc) for lst in frames)
data,result = zip(*dr)
for entry in data:
fdata.write(','.join(entry)+'\r\n')
print("All finished, %d slices in total" % len(data))
fdata.close()
result = np.ravel(result)
return result
def crop_resize(img, size):
"""crop center and resize"""
if img.shape[0] < img.shape[1]:
img = img.T
# we crop image from center
short_egde = min(img.shape[:2])
yy = int((img.shape[0] - short_egde) / 2)
xx = int((img.shape[1] - short_egde) / 2)
crop_img = img[yy : yy + short_egde, xx : xx + short_egde]
# resize to 64, 64
resized_img = transform.resize(crop_img, (size, size))
resized_img *= 255
return resized_img.astype("uint8")
def local_split(train_index):
random.seed(0)
train_index = set(train_index)
all_index = sorted(train_index)
num_test = int(len(all_index) / 3)
random.shuffle(all_index)
train_set = set(all_index[num_test:])
test_set = set(all_index[:num_test])
return train_set, test_set
def split_csv(src_csv, split_to_train, train_csv, test_csv):
ftrain = open(train_csv, "w")
ftest = open(test_csv, "w")
cnt = 0
for l in open(src_csv):
if split_to_train[cnt]:
ftrain.write(l)
else:
ftest.write(l)
cnt = cnt + 1
ftrain.close()
ftest.close()
# Load the list of all the training frames, and shuffle them
# Shuffle the training frames
random.seed(10)
train_frames = get_frames("./train_tiny")
random.shuffle(train_frames)
validate_frames = get_frames("./validate_tiny")
# Write the corresponding label information of each frame into file.
write_label_csv("train-label.csv", train_frames, get_label_map("./train.csv"))
write_label_csv("validate-label.csv", validate_frames, None)
# Dump the data of each frame into a CSV file, apply crop to 64 preprocessor
train_lst = write_data_csv("train-64x64-data.csv", train_frames, lambda x: crop_resize(x, 64))
valid_lst = write_data_csv("./validate-64x64-data.csv", validate_frames, lambda x: crop_resize(x, 64))
# Generate local train/test split, which you could use to tune your model locally.
train_index = np.loadtxt("./train-label.csv", delimiter=",")[:,0].astype("int")
train_set, test_set = local_split(train_index)
split_to_train = [x in train_set for x in train_index]
split_csv("./train-label.csv", split_to_train, "./local_train-label.csv", "./local_test-label.csv")
split_csv("./train-64x64-data.csv", split_to_train, "./local_train-64x64-data.csv", "./local_test-64x64-data.csv")
引发错误。当我以交互方式运行代码时,我发现它是打破脚本的write_data_csv
调用。
例如,在使用write_data_csv
之前,脚本运行时没有错误,但是行valid_lst = write_data_csv("./validate-64x64-data.csv", validate_frames, lambda x: crop_resize(x, 64))
会抛出此错误:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "<input>", line 5, in write_data_csv
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 804, in __call__
while self.dispatch_one_batch(iterator):
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 657, in dispatch_one_batch
tasks = BatchedCalls(itertools.islice(iterator, batch_size))
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 68, in __init__
self.items = list(iterator_slice)
File "<input>", line 5, in <genexpr>
File "E:\Users\hackr\Python\python35\lib\site-packages\joblib\parallel.py", line 161, in delayed
pickle.dumps(function)
_pickle.PicklingError: Can't pickle <function get_data at 0x0000000007B432F0>: attribute lookup get_data on __main__ failed
我不确定我的下一步行动是解决这个问题。我发现其他StackOverflow问题的属性查找失败,但它们实质上是不同的或没有答案。