Question

我有一个我在IPython中运行的脚本，并且基本上接受gene_names的输入.csv文件并将它们推送到此for循环中，其中

with open('C:\Users\Work\Desktop\Book1.csv', 'rU') as f:
reader = csv.reader(f)
with PdfPages('poopyheadjoe04.pdf') as pdf:
    for row in reader:
        gene_name = row
        probe_exclusion_keyword = []
        print gene_name

然后将此列表中的gene_name值（在.csv文件中）输入一行if inference_method == "approximate_random" :(在Scripts.py中）

with open('C:\Users\Work\Desktop\Book1.csv', 'rU') as f:
    reader = csv.reader(f)
    with PdfPages('poopyheadjoe04.pdf') as pdf:
        for row in reader:
            gene_name = row
            probe_exclusion_keyword = []
            print gene_name

            print "Fetching probe ids for gene %s" % gene_name
            probes_dict = get_probes_from_genes(gene_name)
            print "Found %s probes: %s" % (len(probes_dict), ", ".join(probes_dict.values()))

            if probe_exclusion_keyword:
                probes_dict = {probe_id: probe_name for (probe_id, probe_name) in probes_dict.iteritems() if not args.probe_exclusion_keyword in probe_name}
                print "Probes after applying exclusion cryterion: %s" % (", ".join(probes_dict.values()))

            print "Fetching expression values for probes %s" % (", ".join(probes_dict.values()))
            expression_values, well_ids, donor_names = get_expression_values_from_probe_ids(    
            probes_dict.keys())
            print "Found data from %s wells sampled across %s donors" % (len(well_ids), len(set(donor_names)))

            print "Combining information from selected probes"
            combined_expression_values = combine_expression_values(
            expression_values, method=probes_reduction_method)

            print "Translating locations of the wells to MNI space"
            mni_coordinates = get_mni_coordinates_from_wells(well_ids)

            print "Checking values of the provided NIFTI file at well locations"
            nifti_values = get_values_at_locations(
            stat_map, mni_coordinates, mask_file=mask, radius=radius, verbose=True)

        # preparing the data frame
            names = ["NIFTI values", "%s expression" % gene_name, "donor ID"]
            data = pd.DataFrame(np.array(
            [nifti_values, combined_expression_values, donor_names]).T, columns=names)
            data = data.convert_objects(convert_numeric=True)
            len_before = len(data)
            data.dropna(axis=0, inplace=True)
            nans = len_before - len(data)

            if nans > 0:
                print "%s wells fall outside of the mask" % nans

            if inference_method == "fixed":
                print "Performing fixed effect analysis"
                fixed_effects(data, ["NIFTI values", "%s expression" % gene_name])

            **if inference_method == "approximate_random":**
                print "Performing approximate random effect analysis"
                approximate_random_effects(
                data, ["NIFTI values", "%s expression" % gene_name], "donor ID")
                print "poopy"
                pdf.savefig()
                plt.ion() #should i add ion() here?

            if inference_method == "bayesian_random":
                print "Fitting Bayesian hierarchical model"
                bayesian_random_effects(
                data, ["NIFTI values", "%s expression" % gene_name], "donor ID", n_samples, n_burnin)

        # if __name__ == '__main__':    #What exactly does this do? Start trigger for the script to run?
        # main()

触发approximate_random_effects（在Analysis.py中）绘制两个图表，小提琴图和 lmplot ：

def approximate_random_effects(data, labels, group):

    correlation_per_donor = {}
    for donor_id in set(data[group]):
        correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
                                                       list(data[labels[1]][data[group] == donor_id]))
    average_slope = np.array(correlation_per_donor.values()).mean()
    t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
    print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
    sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
    plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
    plt.axhline(0, color="red")

    sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
    plt.ion()

    return average_slope, t, p_val

我正在尝试将所有gene_names的图表保存到pdf文件中，大致遵循“Saving multiple figures to one pdf file in matplotlib”和matplotlib.PdfPages方法。

但是，在pdf文件中，我只获取所有gene_names的lmplot，而不是小提琴情节。我该怎么做才能解决这个问题？

谢谢！非常感谢帮助！

Answer 1

看起来您的代码正在创建两个图形，每个图形一个，但您只在创建第二个图形后调用pdf.savefig()一次，因此只保存第二个图形。

如果你想在你的pdf中每页有一个数字，你需要调用pdf.savefig()两次：创建每个绘图后一次。

我建议您稍微更改程序的结构，这样就可以在每个绘图后保存pdf：

def approximate_random_effects(data, labels, group):

    correlation_per_donor = {}
    for donor_id in set(data[group]):
        correlation_per_donor[donor_id], _, _, _, _ = linregress(list(data[labels[0]][data[group] == donor_id]),
                                                       list(data[labels[1]][data[group] == donor_id]))
    average_slope = np.array(correlation_per_donor.values()).mean()
    t, p_val = ttest_1samp(correlation_per_donor.values(), 0)
    print "Averaged slope across donors = %g (t=%g, p=%g)"%(average_slope, t, p_val)
    with PdfPages('poopyheadjoe04.pdf') as pdf:
        fig = plt.figure()
        sns.violinplot([correlation_per_donor.values()], inner="points", names=["donors"])
        plt.ylabel("Linear regression slopes between %s and %s"%(labels[0],labels[1]))
        plt.axhline(0, color="red")
        pdf.savefig(fig) ## Saving first figure

        fig = plt.figure()
        sns.lmplot(labels[0], labels[1], data, hue=group, col=group, col_wrap=3)
        pdf.savefig(fig) ## Saving second figure

    return average_slope, t, p_val

然后，您需要在主程序中删除行with PdfPages('poopyheadjoe04.pdf') as pdf:，pdf.savefig()和plt.ion()。

如果您需要在同一个pdf页面上显示这两个图，则需要更改violinplot和lmplot，以便他们在同一个图上使用不同的轴。

无法将所有matlibplots保存为pdf - 缺少一些图表（IPython）

1 个答案: