如果特定列值中存在“或”条件,则必须通过数据帧中的先前值拆分并创建新行。尝试使用拆分和连接无法获得所需的输出任何帮助
输入
#include <stdio.h>
#include <iostream>
#include <math.h>
using namespace std;
double calculatePercentage(int unmatched, int charLen) {
double percentageReturn = (((double)(charLen - unmatched) / (double)charLen) *
100);
cout << "percenpercentageReturntage_int " << percentageReturn << endl;
return percentageReturn;
}
double calculatePercentage(double unmatched, double charLen) {
double percentageReturn = (((charLen - unmatched) / charLen) * 100);
cout << "percenpercentageReturntage_double " << percentageReturn << endl;
return percentageReturn;
}
int main()
{
cout << "the integer function value is :" << calculatePercentage(4, 50) << endl;
cout << "the double function value is :" << calculatePercentage((double)4,
(double)50) << endl;
return 0;
}
输出
col1 col2 col3 col4
0 x bca or cba value1 or null x1 or x2
1 y bca value2 x1
答案 0 :(得分:1)
使用:
cols = df.columns
df = (df.join(df.pop('col2')
.str.split(' or ', expand=True)
.stack()
.reset_index(level=1, drop=True)
.rename('col2'))
).reset_index(drop=True).reindex(columns=cols)
print (df)
col1 col2 col3
0 x bca value1
1 x cba value1
2 y bca value2
说明:
pop
列与expand = True
到DataFrame
的{{3}}列split
重塑stack
用于删除第一级MultiIndex
rename
Series
通过新列名reset_index
到原始df join
对于唯一索引是必需的reset_index
表示相同的列名顺序编辑:
df = (df.set_index('col1')
.stack()
.str.split(' or ', expand=True)
.stack()
.unstack(1)
.reset_index(level=1, drop=True)
.reset_index()
)
print (df)
col1 col2 col3 col4
0 x bca value1 x1
1 x cba null x2
2 y bca value2 x1