我有一个数据框
def get_records(**kwargs):
ti = kwargs['ti']
xcom = ti.xcom_pull(task_ids='mysql_read_data_'+str(kwargs['custom_cur_size']))
data = [
{
"attributes": {
"att1": x,
"att2": y
}
}
for x, y in xcom
]
data = json.dumps(data)
ti.xcom_push(key='data_to_cus_'+str(kwargs['custom_cur_size']), value=data)
return data
while limit>cur_size:
mysql_read_data = MySQLReadOperator(dag=dag,
mysql_conn_id='temper_flat_old',
task_id='mysql_read_data_'+str(cur_size),
params={'limit': 100, 'offset': cur_size},
sql="sql/client_data.sql",
trigger_rule="all_done",
)
python_task = PythonOperator(
task_id='python_func_task_'+str(cur_size),
dag=dag,
python_callable=get_records,
provide_context=True,
op_kwargs={'custom_cur_size': str(cur_size)},
trigger_rule="all_done",
)
send_to_endpoint = CustomOperator(
dag=dag,
task_id='custom_op_'+str(cur_size),
data_xcom_task_id=python_task.task_id,
data_xcom_key='data_to_cus_'+str(cur_size),
trigger_rule="all_done",
)
mysql_read_data >> python_task >> send_to_endpoint
cur_size += 1000
我想创建一个索引,以便将所有列除以其可表示级别0(即0或1)的A列的总数
如此
0 1
A B A B
1 4 6 5 6
2 4 8 15 3
3 8 10 10 2
所以输出是
0=16
1=30
到目前为止我有
0 1
A B A B
1 4/16 6/16 5/30 6/30
2 4/16 8/16 15/30 3/30
3 8 /16 10/16 10/30 2/30
但是它不起作用
答案 0 :(得分:2)
使用DataFrame.xs
和A
一起选择级别sum
并传递到DataFrame.div
:
df = df.div(df.xs('A', axis=1, level=1).sum(), axis=1, level=0)
print (df)
0 1
A B A B
1 0.25 0.375 0.166667 0.200000
2 0.25 0.500 0.500000 0.100000
3 0.50 0.625 0.333333 0.066667