我正在使用python,pandas和numpy来读取一些数据。
我有两个数据框:
输入1-成本矩阵(它具有每季和区域的成本):index = regions和columns = seasons 输入2-二进制矩阵(当一个月“a”属于季节“b”时值1:index = seasons,columns = months
我想要的输出是一个矩阵C,它具有每个区域和月份的成本:index = region,column month。
有人可以帮助我吗?我google了很多,但我找不到解决方案。
使用我的代码进行更新
import pandas as pd
import numpy as np
from xlwings import Workbook, Range
import os
print(os.getcwd())
link = (os.getcwd() + '/test.xlsx')
print(link)
#Open the Workbook
wb = Workbook(link)
#
#Reading data
regions=np.array(Range('Sheet1','regions').value)
#[u'Region A' u'Region B' u'Region C' u'Region D']
seasons=np.array(Range('Sheet1','seasons').value)
#[u'Season A' u'Season B' u'Season C' u'Season D']
months=np.array(Range('Sheet1','months').value)
#[u'Jan' u'Feb' u'Mar' u'Apr' u'May' u'Jun' u'Jul' u'Aug']
#read relationship between season and month
data=Range('Sheet1','rel').table.value
relationship=pd.DataFrame(data[0:], index = regions, columns=months)
# Jan Feb Mar Apr May Jun Jul Aug
#Region A 1 1 0 0 0 0 0 0
#Region B 0 0 1 1 0 0 0 0
#Region C 0 0 0 0 1 1 0 0
#Region D 0 0 0 0 0 0 1 1
#read the cost per region
data=Range('Sheet1','cost').table.value
cost=pd.DataFrame(data[0:], index = regions, columns=seasons)
# Season A Season B Season C Season D
#Region A 1 9 7 2
#Region B 7 0 3 3
#Region C 4 0 7 5
#Region D 3 10 3 10
#What I want:
# Jan Feb Mar Apr May Jun Jul Aug
#Region A 1 1 9 9 7 7 2 2
#Region B 7 7 0 0 3 3 3 3
#Region C 4 4 0 0 7 7 5 5
#Region D 3 3 10 10 3 3 10 10
答案 0 :(得分:0)
我认为你的例子中的关系数据框中存在错误,因为你清楚地说明它应该是季节(而不是地区)和月份之间的关系,所以我相应地改变了它
import pandas as pd
import numpy as np
regions = ['Region A', 'Region B', 'Region C', 'Region D']
seasons = ['Season A', 'Season B', 'Season C', 'Season D']
cost_data = np.array([[1, 9, 7, 2], [7, 0, 3, 3], [4, 0, 7, 5], [3, 10, 3, 10]])
cost = pd.DataFrame(data=cost_data, index=regions, columns=seasons)
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug']
rel_data = np.array([[1, 1, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 1, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 0, 1, 1]])
rel = pd.DataFrame(data=rel_data, index=seasons, columns=months)
c = pd.DataFrame(index=regions, columns=months)
for region in regions:
for month in months:
for season in seasons:
if rel.loc[season][month]:
c.loc[region][month] = cost.loc[region][season]
print c
# Jan Feb Mar Apr May Jun Jul Aug
#Region A 1 1 9 9 7 7 2 2
#Region B 7 7 0 0 3 3 3 3
#Region C 4 4 0 0 7 7 5 5
#Region D 3 3 10 10 3 3 10 10