Question

如果特定列值中存在“或”条件，则必须通过数据帧中的先前值拆分并创建新行。尝试使用拆分和连接无法获得所需的输出任何帮助
输入

#include <stdio.h> 
#include <iostream>
#include <math.h>
using namespace std;
double calculatePercentage(int unmatched, int charLen) {
    double percentageReturn = (((double)(charLen - unmatched) / (double)charLen) * 
100);
    cout << "percenpercentageReturntage_int " << percentageReturn << endl;

    return percentageReturn;
}
double calculatePercentage(double unmatched, double charLen) {
    double percentageReturn = (((charLen - unmatched) / charLen) * 100);
    cout << "percenpercentageReturntage_double " << percentageReturn << endl;
    return percentageReturn;
}

int main()
{

    cout << "the integer function  value is :" << calculatePercentage(4, 50) << endl;
    cout << "the double function  value is :" << calculatePercentage((double)4, 
(double)50) << endl;

    return 0;
}

输出

  col1   col2           col3              col4
0  x     bca or cba     value1 or null    x1 or x2
1  y     bca            value2            x1

Answer 1

使用：

cols = df.columns

df = (df.join(df.pop('col2')
        .str.split(' or ', expand=True)
        .stack()
        .reset_index(level=1, drop=True)
        .rename('col2'))
      ).reset_index(drop=True).reindex(columns=cols)
print (df)
  col1 col2    col3
0    x  bca  value1
1    x  cba  value1
2    y  bca  value2

说明：

要提取的第一列pop列与expand = True到DataFrame的{{3}}列
用split重塑
stack用于删除第一级MultiIndex
rename Series通过新列名
然后reset_index到原始df
函数join对于唯一索引是必需的
最后reset_index表示相同的列名顺序

编辑：

df = (df.set_index('col1')
        .stack()
        .str.split(' or ', expand=True)
        .stack()
        .unstack(1)
        .reset_index(level=1, drop=True)
        .reset_index()

)
print (df)

  col1 col2    col3 col4
0    x  bca  value1   x1
1    x  cba    null   x2
2    y  bca  value2   x1

在数据框中添加新行，并按条件拆分前一行

1 个答案: