Question

我有一个很长的Python 3代码，它对许多图像进行过采样，然后将其拟合到2D高斯模型中，以找到图像中源的相关参数（质心坐标，振幅等），将其移至中心，进行归一化，然后以中位数将所有结果堆叠在一起。我正在尝试使用大约50,000张图像中的数据进行此操作。我已经提取了数据并将其从以前的代码中保存下来，该代码将所有图像数据（numpy数组）的二进制文件保存在名为initial_data.data的文件中，并且我还使用了一个天花的wcs包来查找猜测质心坐标，因此它们也以相同顺序保存在centroid_coords.data中。当我第一次尝试在所有图像上运行我的代码时，在代码的过采样部分，它说Killed。然后，我决定尝试将所有图像分解为多个小节，并一次在一个小节上运行代码。如果只有几个小节，这是可以的，但如果我一次只能处理5,000张图像，那将变得过于繁琐和不切实际！下面是我的代码的其余部分，在这里我尝试了三分之二的小节，并尝试仅对第一个进行操作（因此我在第一条用户输入行之后输入了1）。

import pickle
import numpy as np
import re
from scipy import optimize, interpolate
from astropy.io import fits
from astropy.nddata import Cutout2D

frac = int(input('Subset 1, 2, or 3? ')) # select which subset of images to operate on
oversampled_file = 'oversampled_images_frac%d.data' % (frac)
params_file = 'gparams_2D_frac%d.data' % (frac)
final_images_file = 'images_b4stacking_2D_frac%d.data' % (frac)
FWHM_file = 'FWHM_2D_frac%d.data' % (frac)
fitsfilename = 'stacked_images_frac%d.fits' % (frac)

# Define subsets of total image data based on chosen fraction
if frac==1:
    start, end = 0, 50000//3
    print('Will operate on 1st subsection: %d elements' % (end-start))
elif frac==2:
    start, end = 50000//3, 2*50000//3
    print('Will operate on 2nd subsection: %d elements' % (end-start))
elif frac==3:
    start, end = 2*50000//3, 49999
    print('Will operate on 3rd subsection: %d elements' % (end-start))
else:
    print('Subsection of 1, 2, or 3 not detected')

# Read in a subset of initial data and centroid coordinates
with open('initial_data.data', 'rb') as f_r:
    initial_data = pickle.load(f_r)[start:end]
with open('centroid_coords.data', 'rb') as f_r:
    centroid_coords = pickle.load(f_r)[start:end]

# Oversample images
def oversample(data): # pixels -> NxN pixels
    oversampled = []
    N = 5
    c=1
    for data_set in data:
        Y, X = np.shape(data_set)
        x = np.linspace(0, 0.5, X)
        y = np.linspace(0, 0.5, Y)

        f = interpolate.interp2d(x, y, data_set, kind='cubic')

        Xnew = np.linspace(0, 0.5, X*N)
        Ynew = np.linspace(0, 0.5, Y*N)
        new_data = f(Xnew, Ynew)
        oversampled.append(new_data)

        if c%50==0:
            print('Oversampling %f%% complete' % (c*100/len(data)))
        c+=1
    return np.array(oversampled) # array of 2D arrays

resampled_data = oversample(initial_data)

# Save oversampled image data -- array by array to save RAM
with open(oversampled_file, 'wb') as f:
    for image in resampled_data:
        pickle.dump(image, f)

# Fit to 2D Gaussian
def gaussian_func(xy, x0, y0, sigma_x, sigma_y, amp, theta, offset): # (x0, y0) is center
    x, y = xy

    a = (np.cos(theta))**2/(2*sigma_x**2) + (np.sin(theta))**2/(2*sigma_y**2)
    b = -np.sin(2*theta)/(4*sigma_x**2) + np.sin(2*theta)/(4*sigma_y**2)
    c = (np.sin(theta))**2/(2*sigma_x**2) + (np.cos(theta))**2/(2*sigma_y**2)

    inner = a * (x-x0)**2
    inner += 2*b*(x-x0)*(y-y0)
    inner += c * (y-y0)**2
    return (offset + amp * np.exp(-inner)).ravel()

def Sigma2width(sigma):
    return 2 * np.sqrt(2*np.log(2)) * sigma

def generate(data_set):
    xvec = np.arange(0, np.shape(data_set)[1], 1)
    yvec = np.arange(0, np.shape(data_set)[0], 1)
    X, Y = np.meshgrid(xvec, yvec)
    return X, Y

def fit_to_model(data):
    gaussian_params = []
    N = 5
    theta_guess = 6
    c=1
    for i in range(len(data)):
        data_set = data[i]
        # Guess parameters

        # Centroid and amplitude
        offset = np.median(data_set)
        x0, y0 = centroid_coords[i][0]*N, centroid_coords[i][1]*N
        x0, y0 = int(round(x0)), int(round(y0))

        if x0<500 and x0>300 and y0<500 and y0>300: #skips images that have likely inaccurate centroids
            subimage = data_set[y0-80:y0+80, x0-80:x0+80]
            amp = np.max(subimage)
            bg_sub_amp = amp-offset

            # Sigmas and offset
            ylim, xlim = np.shape(subimage)
            x, y = np.arange(0, xlim, 1), np.arange(0, ylim, 1)
            ypix, xpix = np.where(subimage==amp)

            y_range = np.take(subimage-offset, ypix[0], axis=0)
            x_range = np.take(subimage-offset, xpix[0], axis=1)

            half_max = bg_sub_amp/2
            d_x = x_range - half_max
            d_y = y_range - half_max
            indices_x = np.where(d_x > 0)[0]
            indices_y = np.where(d_y > 0)[0]
            width_x = len(indices_x) # estimate of integer pixels only
            width_y = len(indices_y)

            sigma_x = width_x/(2*np.sqrt(2*np.log(2)))
            sigma_y = width_y/(2*np.sqrt(2*np.log(2)))

            # Guesses
            guesses = [x0, y0, sigma_x, sigma_y, amp, theta_guess, offset]

            # Fit to Gaussian
            xx, yy = generate(data_set)

            try:
                pred_params, uncert_cov = optimize.curve_fit(gaussian_func, (xx.ravel(), yy.ravel()), data_set.ravel(), p0=guesses)
            except (OptimizeWarning, RuntimeError): #not working?
                print('Failed to find fit; skipping...')
                continue

            gaussian_params.append(pred_params)

            if c%10==0:
                print('2D Gaussian fit complete for %f%% of images' % (c*100/len(data)))
            c+=1

        else: #not a good Gaussian fit (doesn't have centroid coords I want)
            gaussian_params.append([])
    return gaussian_params

params = fit_to_model(resampled_data)

# Save Gaussian params to data file
with open(params_file, 'wb') as f:
    for param_set in params:
        pickle.dump(param_set, f)

# Make centered cutouts of size 600x600 pixels
def cutout(data_list):
    centered, FWHM_images = [], []
    size = 600

    c=1
    for i in range(len(data_list)):
        data_set = data_list[i]
        if params[i] != []:
            if np.shape(data_set)[0] == np.shape(data_set)[1]: #eliminates images that had objects too close to the edge
                x, y = params[i][0], params[i][1]
                FWHM_x, FWHM_y = Sigma2width(np.abs(params[i][2])), Sigma2width(np.abs(params[i][3]))

                # Pass on images that have bad fit (too big FWHMs)
                if FWHM_x > 50 or FWHM_y > 50:
                    continue

                FWHM_images.append((FWHM_x, FWHM_y))
                cutout = Cutout2D(data_set, (x, y), size).data
                centered.append(cutout)

                if c%50==0:
                    print('Centered cutouts complete for %f%% of images' % (c*100/len(data_list)))
                c+=1
    return centered, FWHM_images

centered_cutouts, FWHM_images = cutout(resampled_data)

# Normalize
def normalize(data):
    img_count = 1
    norm = []
    for data_set in data:
        data_set *= 1/data_set.max()
        norm.append(data_set)

        if img_count%50==0:
            print('Normalization complete for %f%% of images' % (img_count*100/len(data)))
        img_count+=1
    print('NUMBER OF FINAL IMAGES:', img_count)
    return np.array(norm)

final_images = normalize(centered_cutouts)

# Save final (normalized) image data and individual FWHMs before stacking
with open(final_images_file, 'wb') as f:
    for image in final_images:
        pickle.dump(image, f)

with open(FWHM_file, 'wb') as f:
    for width in FWHM_images:
        pickle.dump(width, f)

# Stack images
stacked_image = np.median(final_images, axis=0)

# Write to new FITS file
hdu = fits.PrimaryHDU(stacked_image)
hdu.writeto(fitsfilename, overwrite=True)

如您所见，我的代码中还有多个检查点，这些检查点将新数据保存到二进制数据文件中。这是因为如果发生某些错误，我希望能够只读取上次保存的数据，而不是再次运行所有内容，因为这需要一段时间。我相信我的50,000张图片中的三分之一将花费大约11个小时，因此我在screen上将其运行了一整夜。但是，现在我看到另一条Killed消息。这是我尝试运行此代码后看到的最后两行输出：

2D Gaussian fit complete for 5.340214% of images
Killed

由于我在工作中使用大型服务器，因此我通常有很多可用的RAM（接近100 GB）。每个初始图像的numpy数组数据通常具有大约157x158的形状，然后在过采样后当然会变得大于700x700。

我不想被分成许多小部分，因为那样的话，我将坐在那里整天一个接一个地运行它们。我还能做些其他事情来确保代码通过而不被杀死吗？另外，实际上，我可能只想将代码末尾的所有图像数据保存到每个小节的文件中，因为我真正需要做的是将所有图像的中值（来自所有小节）一起计算。也许发电机会被证明是有用的？我已经阅读了有关它们的内容，但不确定在这里是否真的可以使用。我也可以访问多个CPU，但是我不确定如何在Python中使用multiprocessing（如果这样做会有所帮助）。谢谢！

如何防止Python杀死代码

0 个答案: