合并具有多个索引和列值的数据帧

时间:2021-07-16 10:01:17

标签: python pandas merge multi-index

我有两个具有多个索引和日期作为列的数据框:

df1

df1 = pd.DataFrame.from_dict({('group', ''): {0: 'A',
  1: 'A',
  2: 'A',
  3: 'A',
  4: 'A',
  5: 'A',
  6: 'A',
  7: 'A',
  8: 'B',
  9: 'B',
  10: 'B',
  11: 'B',
  12: 'B',
  13: 'B',
  14: 'B',
  15: 'B',
  16: 'C',
  17: 'C',
  18: 'C',
  19: 'C',
  20: 'C',
  21: 'C',
  22: 'C',
  23: 'C',
  24: 'D',
  25: 'D',
  26: 'D',
  27: 'D',
  28: 'D',
  29: 'D',
  30: 'D'},
 ('category', ''): {0: 'Apple',
  1: 'Amazon',
  2: 'Google',
  3: 'Netflix',
  4: 'Facebook',
  5: 'Uber',
  6: 'Tesla',
  7: 'total',
  8: 'Apple',
  9: 'Amazon',
  10: 'Google',
  11: 'Netflix',
  12: 'Facebook',
  13: 'Uber',
  14: 'Tesla',
  15: 'total',
  16: 'Apple',
  17: 'Amazon',
  18: 'Google',
  19: 'Netflix',
  20: 'Facebook',
  21: 'Uber',
  22: 'Tesla',
  23: 'total',
  24: 'Apple',
  25: 'Amazon',
  26: 'Google',
  27: 'Netflix',
  28: 'Uber',
  29: 'Tesla',
  30: 'total'},
 (pd.Timestamp('2021-06-28 00:00:00'), 'total_orders'): {0: 88.0,
  1: 66.0,
  2: 191.0,
  3: 558.0,
  4: 12.0,
  5: 4.0,
  6: 56.0,
  7: 975.0,
  8: 90.0,
  9: 26.0,
  10: 49.0,
  11: 250.0,
  12: 7.0,
  13: 2.0,
  14: 44.0,
  15: 468.0,
  16: 36.0,
  17: 52.0,
  18: 94.0,
  19: 750.0,
  20: 10.0,
  21: 0.0,
  22: 52.0,
  23: 994.0,
  24: 16.0,
  25: 22.0,
  26: 5.0,
  27: 57.0,
  28: 3.0,
  29: 33.0,
  30: 136.0},
 (pd.Timestamp('2021-06-28 00:00:00'), 'total_sales'): {0: 4603.209999999999,
  1: 2485.059999999998,
  2: 4919.39999999998,
  3: 6097.77,
  4: 31.22,
  5: 155.71,
  6: 3484.99,
  7: 17237.35999999996,
  8: 561.54,
  9: 698.75,
  10: 1290.13,
  11: 4292.68000000001,
  12: 947.65,
  13: 329.0,
  14: 2889.65,
  15: 9989.4,
  16: 330.8899999999994,
  17: 2076.26,
  18: 2982.270000000004,
  19: 11978.62000000002,
  20: 683.0,
  21: 0.0,
  22: 3812.16999999999,
  23: 20963.21000000002,
  24: 234.4900000000002,
  25: 896.1,
  26: 231.0,
  27: 893.810000000001,
  28: 129.0,
  29: 1712.329999999998,
  30: 4106.729999999996},
 (pd.Timestamp('2021-07-05 00:00:00'), 'total_orders'): {0: 109.0,
  1: 48.0,
  2: 174.0,
  3: 592.0,
  4: 13.0,
  5: 5.0,
  6: 43.0,
  7: 984.0,
  8: 62.0,
  9: 13.0,
  10: 37.0,
  11: 196.0,
  12: 8.0,
  13: 1.0,
  14: 3.0,
  15: 30.0,
  16: 76.0,
  17: 5.0,
  18: 147.0,
  19: 88.0,
  20: 8.0,
  21: 1.0,
  22: 78.0,
  23: 1248.0,
  24: 1.0,
  25: 18.0,
  26: 23.0,
  27: 83.0,
  28: 0.0,
  29: 29.0,
  30: 154.0},
 (pd.Timestamp('2021-07-05 00:00:00'), 'total_sales'): {0: 3453.02,
  1: 17868.730000000003,
  2: 44707.82999999999,
  3: 61425.99,
  4: 1261.0,
  5: 1914.6000000000001,
  6: 24146.09,
  7: 154777.25999999998,
  8: 6201.489999999999,
  9: 5513.960000000001,
  10: 9645.87,
  11: 25086.785,
  12: 663.0,
  13: 448.61,
  14: 26332.7,
  15: 73892.415,
  16: 556.749999999999,
  17: 1746.859999999997,
  18: 4103.219999999994,
  19: 15571.52000000008,
  20: 86.0,
  21: 69.0,
  22: 5882.759999999995,
  23: 26476.11000000004,
  24: 53.0,
  25: 801.220000000001,
  26: 684.56,
  27: 1232.600000000002,
  28: 0.0,
  29: 15902.1,
  30: 43943.48},
 (pd.Timestamp('2021-07-12 00:00:00'), 'total_orders'): {0: 32.0,
  1: 15.0,
  2: 89.0,
  3: 239.0,
  4: 2.0,
  5: 3.0,
  6: 20.0,
  7: 400.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0,
  16: 21.0,
  17: 14.0,
  18: 58.0,
  19: 281.0,
  20: 3.0,
  21: 3.0,
  22: 33.0,
  23: 413.0,
  24: 7.0,
  25: 6.0,
  26: 4.0,
  27: 13.0,
  28: 0.0,
  29: 18.0,
  30: 48.0},
 (pd.Timestamp('2021-07-12 00:00:00'), 'total_sales'): {0: 2147.7000000000003,
  1: 4767.3,
  2: 2399.300000000003,
  3: 3137.440000000002,
  4: 178.0,
  5: 866.61,
  6: 10639.03,
  7: 73235.38,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 0.0,
  13: 0.0,
  14: 0.0,
  15: 0.0,
  16: 220.94,
  17: 727.5199999999995,
  18: 2500.96999999999,
  19: 4414.00999999998,
  20: 15.0,
  21: 196.71,
  22: 2170.1,
  23: 9745.24999999997,
  24: 126.55,
  25: 290.2,
  26: 146.01,
  27: 233.0,
  28: 0.0,
  29: 973.18,
  30: 1658.940000000002}}).set_index(['group','category'])

df2

df2 = pd.DataFrame.from_dict({'group': {0: 'total_full',
  1: 'total_full',
  2: 'A',
  3: 'A',
  4: 'B',
  5: 'B',
  6: 'C',
  7: 'C',
  8: 'D',
  9: 'D',
  10: 'Apple_total',
  11: 'Apple_total',
  12: 'A',
  13: 'A',
  14: 'B',
  15: 'B',
  16: 'C',
  17: 'C',
  18: 'D',
  19: 'D',
  20: 'Amazon_total',
  21: 'Amazon_total',
  22: 'A',
  23: 'A',
  24: 'B',
  25: 'B',
  26: 'C',
  27: 'C',
  28: 'D',
  29: 'D',
  30: 'Google_total',
  31: 'Google_total',
  32: 'A',
  33: 'A',
  34: 'B',
  35: 'B',
  36: 'C',
  37: 'C',
  38: 'D',
  39: 'D',
  40: 'Facebook_total',
  41: 'Facebook_total',
  42: 'A',
  43: 'A',
  44: 'B',
  45: 'B',
  46: 'C',
  47: 'C',
  48: 'D',
  49: 'D',
  50: 'Netflix_total',
  51: 'Netflix_total',
  52: 'A',
  53: 'A',
  54: 'B',
  55: 'B',
  56: 'C',
  57: 'C',
  58: 'D',
  59: 'D',
  60: 'Tesla_total',
  61: 'Tesla_total',
  62: 'A',
  63: 'A',
  64: 'B',
  65: 'B',
  66: 'C',
  67: 'C',
  68: 'D',
  69: 'D',
  70: 'Uber_total',
  71: 'Uber_total',
  72: 'A',
  73: 'A',
  74: 'B',
  75: 'B',
  76: 'C',
  77: 'C',
  78: 'D',
  79: 'D'},
 'category': {0: 'total_full',
  1: 'total_full',
  2: 'group_total',
  3: 'group_total',
  4: 'group_total',
  5: 'group_total',
  6: 'group_total',
  7: 'group_total',
  8: 'group_total',
  9: 'group_total',
  10: 'Apple_total',
  11: 'Apple_total',
  12: 'Apple',
  13: 'Apple',
  14: 'Apple',
  15: 'Apple',
  16: 'Apple',
  17: 'Apple',
  18: 'Apple',
  19: 'Apple',
  20: 'Amazon_total',
  21: 'Amazon_total',
  22: 'Amazon',
  23: 'Amazon',
  24: 'Amazon',
  25: 'Amazon',
  26: 'Amazon',
  27: 'Amazon',
  28: 'Amazon',
  29: 'Amazon',
  30: 'Google_total',
  31: 'Google_total',
  32: 'Google',
  33: 'Google',
  34: 'Google',
  35: 'Google',
  36: 'Google',
  37: 'Google',
  38: 'Google',
  39: 'Google',
  40: 'Facebook_total',
  41: 'Facebook_total',
  42: 'Facebook',
  43: 'Facebook',
  44: 'Facebook',
  45: 'Facebook',
  46: 'Facebook',
  47: 'Facebook',
  48: 'Facebook',
  49: 'Facebook',
  50: 'Netflix_total',
  51: 'Netflix_total',
  52: 'Netflix',
  53: 'Netflix',
  54: 'Netflix',
  55: 'Netflix',
  56: 'Netflix',
  57: 'Netflix',
  58: 'Netflix',
  59: 'Netflix',
  60: 'Tesla_total',
  61: 'Tesla_total',
  62: 'Tesla',
  63: 'Tesla',
  64: 'Tesla',
  65: 'Tesla',
  66: 'Tesla',
  67: 'Tesla',
  68: 'Tesla',
  69: 'Tesla',
  70: 'Uber_total',
  71: 'Uber_total',
  72: 'Uber',
  73: 'Uber',
  74: 'Uber',
  75: 'Uber',
  76: 'Uber',
  77: 'Uber',
  78: 'Uber',
  79: 'Uber'},
 'type': {0: 'Sales_1',
  1: 'Sales_2',
  2: 'Sales_1',
  3: 'Sales_2',
  4: 'Sales_1',
  5: 'Sales_2',
  6: 'Sales_1',
  7: 'Sales_2',
  8: 'Sales_1',
  9: 'Sales_2',
  10: 'Sales_1',
  11: 'Sales_2',
  12: 'Sales_1',
  13: 'Sales_2',
  14: 'Sales_1',
  15: 'Sales_2',
  16: 'Sales_1',
  17: 'Sales_2',
  18: 'Sales_1',
  19: 'Sales_2',
  20: 'Sales_1',
  21: 'Sales_2',
  22: 'Sales_1',
  23: 'Sales_2',
  24: 'Sales_1',
  25: 'Sales_2',
  26: 'Sales_1',
  27: 'Sales_2',
  28: 'Sales_1',
  29: 'Sales_2',
  30: 'Sales_1',
  31: 'Sales_2',
  32: 'Sales_1',
  33: 'Sales_2',
  34: 'Sales_1',
  35: 'Sales_2',
  36: 'Sales_1',
  37: 'Sales_2',
  38: 'Sales_1',
  39: 'Sales_2',
  40: 'Sales_1',
  41: 'Sales_2',
  42: 'Sales_1',
  43: 'Sales_2',
  44: 'Sales_1',
  45: 'Sales_2',
  46: 'Sales_1',
  47: 'Sales_2',
  48: 'Sales_1',
  49: 'Sales_2',
  50: 'Sales_1',
  51: 'Sales_2',
  52: 'Sales_1',
  53: 'Sales_2',
  54: 'Sales_1',
  55: 'Sales_2',
  56: 'Sales_1',
  57: 'Sales_2',
  58: 'Sales_1',
  59: 'Sales_2',
  60: 'Sales_1',
  61: 'Sales_2',
  62: 'Sales_1',
  63: 'Sales_2',
  64: 'Sales_1',
  65: 'Sales_2',
  66: 'Sales_1',
  67: 'Sales_2',
  68: 'Sales_1',
  69: 'Sales_2',
  70: 'Sales_1',
  71: 'Sales_2',
  72: 'Sales_1',
  73: 'Sales_2',
  74: 'Sales_1',
  75: 'Sales_2',
  76: 'Sales_1',
  77: 'Sales_2',
  78: 'Sales_1',
  79: 'Sales_2'},
 '2021-06-28': {0: 67.5277641202152,
  1: 82.7854700135998,
  2: 21.50082266792856,
  3: 22.03644997199996,
  4: 64.460440147,
  5: 10.1060499896,
  6: 65.1530371974946,
  7: 50.6429700519999,
  8: 56.413464107792045,
  9: 0,
  10: 17.48074540313092,
  11: 26.8376199976,
  12: 52.172,
  13: 61.16600000040001,
  14: 20.9447844,
  15: 40.69122000000001,
  16: 83.55718929717925,
  17: 14.98039999719995,
  18: 20.806771705951697,
  19: np.nan,
  20: 18.3766353690825,
  21: 12.82565001479992,
  22: 52.425508769690694,
  23: 25.661999978399994,
  24: 17.88071596,
  25: 24.384659998799997,
  26: 91.10086982794643,
  27: 12.77899003759993,
  28: 16.969540811445366,
  29: np.nan,
  30: 18.8795397517309,
  31: 26.73017999840005,
  32: 53.52039700062155,
  33: 58.81199999639999,
  34: 12.1243325,
  35: 24.0544100028,
  36: 55.94068246571674,
  37: 133.86376999920006,
  38: 7.294127785392621,
  39: np.nan,
  40: 6.07807089184563,
  41: 7.27483001599998,
  42: 2.300470581874837,
  43: 30.71300000639998,
  44: 5.810764652,
  45: 12.333119997600003,
  46: 25.475930745418292,
  47: 64.228710012,
  48: 9.490904912552498,
  49: np.nan,
  50: 8.184780211399392,
  51: 24.59321999400001,
  52: 6.807138946302334,
  53: 12.0879999972,
  54: 0.869207661,
  55: 0.324,
  56: 0.5084336040970575,
  57: 12.181219996800007,
  58: 0,
  59: np.nan,
  60: 9.293956915067886,
  61: 11.171379993599999,
  62: 6.384936971649232,
  63: 3.657999996,
  64: 0.913782413,
  65: 1.9992000012000002,
  66: 1.5322078073061867,
  67: 5.514179996399999,
  68: 0.4630297231124678,
  69: np.nan,
  70: 36.23403557795798,
  71: 53.35258999919999,
  72: 21.890370397789923,
  73: 9.937449997200002,
  74: 5.916852561,
  75: 6.319439989199998,
  76: 7.03772344983066,
  77: 37.095700012799995,
  78: 1.3890891693374032,
  79: np.nan},
 '2021-07-05': {0: 65.4690491915759,
  1: 98.5235100112003,
  2: 21.4573181155924,
  3: 241.06741999679997,
  4: 67.481716829,
  5: 11.60325000040002,
  6: 27.5807099999998,
  7: 65.8528400140003,
  8: 58.949304246983736,
  9: 0.0,
  10: 185.65887577993723,
  11: 318.9965699964001,
  12: 54.517,
  13: 66.55265999039996,
  14: 21.92632044,
  15: 43.67116000320002,
  16: 87.47349898707688,
  17: 208.7727500028001,
  18: 21.742056352860352,
  19: np.nan,
  20: 16.6038963173654,
  21: 25.28952001920013,
  22: 54.7820864335212,
  23: 36.75802000560001,
  24: 18.71872129,
  25: 30.1634600016,
  26: 95.37075040035738,
  27: 138.3680400120001,
  28: 17.73233819348684,
  29: np.nan,
  30: 14.80302342121337,
  31: 251.83851001200003,
  32: 55.926190956481534,
  33: 72.4443400032,
  34: 12.69221484,
  35: 26.032340003999998,
  36: 58.56261169338368,
  37: 153.36183000480003,
  38: 7.622005931348156,
  39: np.nan,
  40: 72.24367956241771,
  41: 14.83083001279999,
  42: 29.5726042895728,
  43: 38.723000005199985,
  44: 6.083562133,
  45: 12.845630001599998,
  46: 26.66998281055652,
  47: 63.26220000600001,
  48: 9.917530329288393,
  49: np.nan,
  50: 8.555606693927,
  51: 23.802009994800002,
  52: 7.113126469779095,
  53: 7.206999998399999,
  54: 0.910216433,
  55: 1.4089999991999997,
  56: 0.5322637911479053,
  57: 15.186009997200001,
  58: 0.0,
  59: np.nan,
  60: 9.716385738295367,
  61: 14.7327399948,
  62: 6.671946105284065,
  63: 5.691999996,
  64: 0.956574175,
  65: 1.0203399996,
  66: 1.6040220980113027,
  67: 8.020399999199999,
  68: 0.4838433599999999,
  69: np.nan,
  70: 37.88758167841983,
  71: 59.03332998119994,
  72: 22.874363860953647,
  73: 13.690399997999998,
  74: 6.194107518,
  75: 6.4613199911999954,
  76: 7.367580219466185,
  77: 38.881609991999944,
  78: 1.4515300799999995,
  79: np.nan},
 '2021-07-12': {0: 607.2971827405001,
  1: 88.9671100664001,
  2: 21.26749278974862,
  3: 17.1524199804,
  4: 64.471138092,
  5: 89.84481002279999,
  6: 26.2044999999998,
  7: 51.9698800632001,
  8: 5.354051858751745,
  9: 0.0,
  10: 177.42361595891452,
  11: 287.5395700032,
  12: 52.117,
  13: 47.388199995600004,
  14: 20.94835038,
  15: 41.4250800048,
  16: 83.57340667555117,
  17: 198.72629000280003,
  18: 20.784858903363354,
  19: np.nan,
  20: 178.323907459086,
  21: 185.83897002839998,
  22: 52.37029646474982,
  23: 27.87144997800001,
  24: 17.88339044,
  25: 23.645340010799984,
  26: 91.11855133792106,
  27: 134.3221800396,
  28: 16.95166921641509,
  29: np.nan,
  30: 128.82813286243115,
  31: 192.6867300156,
  32: 53.46403160619618,
  33: 41.412320006399995,
  34: 12.1261155,
  35: 11.840830002000002,
  36: 55.95153983444301,
  37: 139.43358000720002,
  38: 7.286445921791947,
  39: np.nan,
  40: 69.04410667683521,
  41: 93.877410018,
  42: 28.270665735943805,
  43: 27.512680004399986,
  44: 5.811656147,
  45: 5.2319800032,
  46: 25.480875296710053,
  47: 61.132750010400024,
  48: 9.480909497181356,
  49: np.nan,
  50: 8.178601399067174,
  51: 17.6743199976,
  52: 6.7999699585309585,
  53: 6.131999998799999,
  54: 0.870099156,
  55: 0.6185600004,
  56: 0.5085322845362154,
  57: 10.923759998400003,
  58: 0.0,
  59: np.nan,
  60: 9.287042311133577,
  61: 19.966500000000007,
  62: 6.378212628950804,
  63: 6.524999997600001,
  64: 0.913782413,
  65: 1.9303400016,
  66: 1.5325051891827732,
  67: 11.511160000800006,
  68: 0.4625420799999998,
  69: np.nan,
  70: 36.21177607303267,
  71: 51.3836100036,
  72: 21.86731639537707,
  73: 10.310769999600003,
  74: 5.917744056,
  75: 5.152679999999999,
  76: 7.039089381655591,
  77: 35.920160003999996,
  78: 1.3876262399999995,
  79: np.nan}}).set_index(['group','category','type'])

我正在尝试通过 df2 合并 df1 上的 group, category, date (date is a column),以便我的输出如下所示:

在我想要的输出示例中,我省略了 df2sales_1 & sales_2 中的值,但这些行应填充 group 中相应的 categorydf2 值}}。

                    2021-06-28                                                               2021-07-05                                                      2021-07-12
                    total_orders    total_sales         sales_1        sales_2                  total_orders    total_sales         sales_1        sales_2            total_orders    total_sales         sales_1        sales_2
group    category
A        Apple        88.000          4,603.210
         Amazon       66.000          2,485.060
         Google      191.000          4,919.400
         Netflix     558.000          6,097.770
         Facebook     12.000             31.220
         Uber          4.000            155.710
         Tesla        56.000          3,484.990
         total       975.000         17,237.360
B        Apple        90.000            561.540
         Amazon       26.000            698.750
         Google       49.000          1,290.130
         Netflix     250.000          4,292.680
         Facebook      7.000            947.650
         Uber          2.000            329.000
         Tesla        44.000          2,889.650
         total       468.000          9,989.400
C        Apple        36.000            330.890
         Amazon       52.000          2,076.260
         Google       94.000          2,982.270
         Netflix     750.000         11,978.620
         Facebook     10.000            683.000
         Uber          0.000              0.000
         Tesla        52.000          3,812.170
         total       994.000         20,963.210
D        Apple        16.000            234.490
         Amazon       22.000            896.100
         Google        5.000            231.000
         Netflix      57.000            893.810
         Uber          3.000            129.000
         Tesla        33.000          1,712.330
         total       136.000          4,106.730

这样 sales_1 & sales_2 就合并在 group & category 上并在同一个 date 列上。 total_x 中的 df2 可以忽略,因为它可以从字段中计算出来。

total_values 不用于合并,只用于它之后的那些。

Enter image description here

我尝试过的:

df1.reset_index().merge(df2.reset_index(), left_on=['group', 'category'], right_on=['group', 'category'])

抛出警告:

<块引用>

用户警告:不同级别之间的合并可能会产生意想不到的结果(左侧 2 个级别,右侧 1 个)

这不是我期望它合并的方式。我怎样才能达到我想要的输出?

使用

df = df1.merge(df2.unstack(), left_index=True, right_index=True)

产生:

Enter image description here

然后只是重新排序列,因为我想要一个唯一的日期和 4 列吗?或者可能是某个日期有00:00:00

1 个答案:

答案 0 :(得分:4)

首先在DatetimeIndex的列中创建df2,然后unstack并通过MultiIndexes合并:

f = lambda x: pd.to_datetime(x)
df = (df1.merge(df2.rename(columns=f).unstack(), left_index=True, right_index=True)
        .sort_index(axis=1))

print (df.head())

               2021-06-28                                  2021-07-05  \
                  Sales_1 Sales_2 total_orders total_sales    Sales_1   
group category                                                          
A     Apple     52.172000  61.166         88.0     4603.21  54.517000   
      Amazon    52.425509  25.662         66.0     2485.06  54.782086   
      Google    53.520397  58.812        191.0     4919.40  55.926191   
      Netflix    6.807139  12.088        558.0     6097.77   7.113126   
      Facebook   2.300471  30.713         12.0       31.22  29.572604   

                                                  2021-07-12            \
                 Sales_2 total_orders total_sales    Sales_1   Sales_2   
group category                                                           
A     Apple     66.55266        109.0     3453.02  52.117000  47.38820   
      Amazon    36.75802         48.0    17868.73  52.370296  27.87145   
      Google    72.44434        174.0    44707.83  53.464032  41.41232   
      Netflix    7.20700        592.0    61425.99   6.799970   6.13200   
      Facebook  38.72300         13.0     1261.00  28.270666  27.51268   

                                         
               total_orders total_sales  
group category                           
A     Apple            32.0     2147.70  
      Amazon           15.0     4767.30  
      Google           89.0     2399.30  
      Netflix         239.0     3137.44  
      Facebook          2.0      178.00  
相关问题