Python Pandas中的累积总和

时间:2015-02-02 04:21:14

标签: python pandas

这是我的表格数据框:

col1 col2 col3 col4 col5 col6 col7 
1      1    1    1   137  500  11
1      1    1    1   120  500  11
1      1    2    1   101  500  11
1      1    3    1   55   500  11
1      2    2    1   133  340  12
1      2    2    1   125  340  12
1      2    1    1   63   340  12

我必须使用col6值和col5的累积总和之间的差异更新数据框,直到该特定行,并将该值存储在名为“更新”的单独列中。累积总和应该达到Col7中的值举个例子: col8值将是:

col8
(500-137)
(500-137-120)
(500-137-120-101)
(500-137-120-101-55)
(340-133)
(340-133-125)
(340-133-125-63)

你能建议一些解决方案吗? 我必须使用python pandas。

3 个答案:

答案 0 :(得分:1)

我认为您尝试使用cumsum可能无效,因为您没有按col7分组 - 从您的示例计算中可以明显看出,您只计算{{1}每个值内的累积总和1}},所以我想你想要:

col7

答案 1 :(得分:0)

试试这个。要查找running total,有不同的方式。

;WITH cte
     AS (SELECT *,
                Row_number()OVER(partition BY col6 ORDER BY col6)rn
         FROM   Yourtable) As Col8
SELECT *,
       col6 - (SELECT Sum(b.col5)
               FROM   cte b
               WHERE  a.col6 = b.col6
                      AND b.rn <= a.rn)
FROM   cte a;

如果您使用Sql Server 2012+,请尝试此操作。

WITH cte
     AS (SELECT *,
                Row_number()OVER(partition BY col6 ORDER BY col6)rn
         FROM   Yourtable)
SELECT *,
       col6 - Sum(col5)
                OVER(partition BY col6 ORDER BY col6 rows UNBOUNDED PRECEDING) As Col8
FROM   cte a 

SqlFiddle Demo

答案 2 :(得分:-1)

用于SQL Server

样本表

CREATE TABLE #TEMP(col1 INT, col2 INT, col3 INT, col4 INT, col5 INT, col6 INT, col7 INT)

INSERT INTO #TEMP
SELECT 1,      1,    1,    1,   137,  500,  11
UNION ALL
SELECT 1,      1,    1,    1,   120,  500,  11
UNION ALL
SELECT 1,      1,    2,    1,   101,  500,  11
UNION ALL
SELECT 1,      1,    3,    1,   55,   500,  11
UNION ALL
SELECT 1,      2,    2,    1,   133,  340,  12
UNION ALL
SELECT 1,      2,    2,    1,   125,  340,  12
UNION ALL
SELECT 1,      2,    1,    1,   63,   340,  12

<强> QUERY

;WITH CTE AS
(
   -- Retrieve row number for each type of COL6 in default order
   SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
   FROM #TEMP
)
-- Append current COL6 with each items 
SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
'(' + CAST(C2.COL6 AS VARCHAR(200))+'-'+
-- Retrives comma separated values of previous COL5 values 
-- for each type in COL6
SUBSTRING(
            (SELECT                  
            REPLACE (', ' + CAST(COL6 AS VARCHAR(200)) + '-' + CAST(COL5 AS VARCHAR(200)),', '+CAST(COL6 AS VARCHAR(200)),'')
            FROM CTE   
            WHERE RNO<=C2.RNO AND COL6=C2.COL6
            ORDER BY RNO
            FOR XML PATH('')),2,200000) +')' COL8
FROM CTE C2

编辑:

如果您想在COL8中运行总计并存储,可以尝试以下查询

;WITH CTE AS
(
   -- Retrieve row number for each type of COL6 in default order
   SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
   FROM #TEMP
)
SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
(
     SELECT SUM(COL5)+col6 
     FROM CTE   
     WHERE RNO<=C2.RNO AND COL6=C2.COL6
     GROUP BY col6
)COL8
FROM CTE C2

编辑2:更新了表格的更新查询

;WITH CTE AS
(
   -- Retrieve row number for each type of COL6 in default order
   SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
   FROM #TEMP
)
UPDATE #TEMP SET COL8 = TAB.COL8 
FROM
(
    SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
    (
       SELECT SUM(COL5)+col6 
       FROM CTE   
       WHERE RNO<=C2.RNO AND COL6=C2.COL6
       GROUP BY col6
    )COL8
    FROM CTE C2
)TAB
WHERE TAB.COL5=#TEMP.COL5 AND TAB.COL6=#TEMP.COL6