df = 

contig  pos ref haplotype_block hap_X   hap_Y   odds_ratio  My_hap  Sp_hap
2   5207    T   1856    T   A   167.922 T   A
2   5238    G   1856    C   G   -   C   G
2   5723    A   1856    A   T   -   A   T
2   5867    C   1856    T   C   -   T   C
2   155667  G   2816    G   *   1.0 N   N
2   155670  T   2816    T   *   -   N   N
2   67910   C   2   C   T   0.21600000000000003 T   C
2   67941   A   2   A   T   -   T   A
2   68016   A   2   A   G   -   G   A
2   118146  C   132 T   C   1369.0  T   C
2   118237  A   132 C   A   -   C   A
2   118938  A   1157    T   A   0.002   A   T

df.sort_values(by=['contig', 'pos'], inplace=True, ascending=False)

print(df) #is giving me

contig  pos ref haplotype_block hap_X   hap_Y   odds_ratio  My_hap  Sp_hap
2   118146  C   132 T   C   1369.0  T   C
2   118237  A   132 C   A   -   C   A
2   118938  A   1157    T   A   0.002   A   T
2   155667  G   2816    G   *   1.0 N   N
2   155670  T   2816    T   *   -   N   N
2   5207    T   1856    T   A   167.922 T   A
2   5238    G   1856    C   G   -   C   G
2   5723    A   1856    A   T   -   A   T
2   5867    C   1856    T   C   -   T   C

因此,它只使用两列(contig and pos)的第一个数字对数据进行排序。为什么会这样?和一个非常简单的记忆有效的解决方法?




<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 9 columns):
contig             333 non-null int64
pos                333 non-null object
ref                333 non-null object
haplotype_block    333 non-null int64
hap_X              333 non-null object
hap_Y              333 non-null object
odds_ratio         333 non-null object
My_hap             333 non-null object
Sp_hap             333 non-null object
dtypes: int64(2), object(7)
memory usage: 23.5+ KB

df['contig'] = df['contig'].astype(int)
df['pos'] = df['pos'].astype(int)


df.sort_values(by=['contig', 'pos'], inplace=True, ascending=True)
