为什么stack_data()返回一个空数组?

时间:2014-08-18 06:42:00

标签: python

我定义了以下功能。出于某种原因,stack_data()总是返回一个空数组,我无法弄清楚原因。有没有人有任何建议?

关于改进编码风格,形式,可读性等的一般性建议将非常有帮助。一般的调试技巧也很棒。

应该发生什么的例子: 输入:
打印(stack_data(np.array([[1,1,1,2,2,2,3,3,3],[4,4,4,5,5,5,6,6,6],[ 7,7,7,8,8,8,9,9,9]]),0.33))

输出: [4,1,4,2,2,3,4,4,4.5,7,7,7.5,9,9]

def _fullsweep_ranges(spec_data):
    start = [x for x in range(0,len(spec_data[:,1])) \
             if spec_data[x,1] == spec_data[:,1].min()]
    stop = [x for x in range(0,len(spec_data[:,1])) \
             if spec_data[x,1] == spec_data[:,1].max()]
    return zip(start,stop)

def _remove_partial_fullsweeps(spec_data):
    ranges = _fullsweep_ranges(spec_data)
    first_min_index = ranges[0][0]
    last_max_index = ranges[-1][1]
    return spec_data[first_min_index:last_max_index+1,:]

def _flatten_data(spec_data):
    row = 0
    flat_data = []
    running = False
    while (row < np.shape(spec_data)[0] - 1):
        if not(running):        
            start = row
        running = True
        if spec_data[row,1] != spec_data[row+1,1]:
            stop = row
            running = False
            time = np.mean(spec_data[start:stop,0], axis=0)
            start_freq = spec_data[start,1]
            freq_step = np.mean(spec_data[start:stop,2], axis=0)
            bin_size = spec_data[0,3] * (stop - start)
            avg_subspectra = np.mean(spec_data[start:stop,4:], axis=0)
            data_row = [time, start_freq, freq_step, bin_size, avg_subspectra]
            flat_data.append(data_row)
        row += 1
    return np.array(flat_data)

def _split_row(row, num_overlap):
    return row[:num_overlap], row[num_overlap:-num_overlap], row[-num_overlap:]

def stack_data(spec_data, percent_overlap):
    """
   input: spectrum data file and percent that subspectra are overlapping
   output: 2d numpy array where each row is a fullsweep with overlapping
           regions averaged, first col is the center time of the fullsweep,
           second col is the start frequency of the fullsweep (this should
           be the same for each row), and third col is freq_step
   """
    spec_data = _remove_partial_fullsweeps(spec_data)
    spec_data = _flatten_data(spec_data)
    ranges = _fullsweep_ranges(spec_data)
    num_overlap = math.ceil(len(spec_data[0,4:]) * percent_overlap)
    output = []
    for start,stop in ranges:
        center_time = np.mean(spec_data[start:stop+1,0], axis=0)
        start_freq = spec_data[start,1]
        freq_step = np.mean(spec_data[start:stop+1,2], axis=0)
        output_row = [center_time, start_freq, freq_step]
        split_data = [_split_row(row, num_overlap) for \
                      row in spec_data[start:stop+1]]
        for i, beg, mid, end in enumerate(split_data):
            if i == 0:
                output_row.extend(beg)
            output_row.extend(mid)
            if i == len(split_data) - 1:
                output_row.extend(end)
            else:
                next_beg = split_data[i+1][0]
                averaged = np.mean([end, next_beg], axis=0)
                output_row.extend(averaged)
        output.append(output_row)
    return np.array(output)

1 个答案:

答案 0 :(得分:1)

错误来自返回行中的_flatten_data()

return np.array(flat_data)

因为您发布的示例中的flat_data是:

[[nan, 1, nan, 0, array([ nan,  nan,  nan,  nan,  nan])], [nan, 4, nan, 0, array([ nan,  nan,  nan,  nan,  nan])]]

a representation of a multidimensional array