这是我的表格数据框:
col1 col2 col3 col4 col5 col6 col7
1 1 1 1 137 500 11
1 1 1 1 120 500 11
1 1 2 1 101 500 11
1 1 3 1 55 500 11
1 2 2 1 133 340 12
1 2 2 1 125 340 12
1 2 1 1 63 340 12
我必须使用col6值和col5的累积总和之间的差异更新数据框,直到该特定行,并将该值存储在名为“更新”的单独列中。累积总和应该达到Col7中的值举个例子: col8值将是:
col8
(500-137)
(500-137-120)
(500-137-120-101)
(500-137-120-101-55)
(340-133)
(340-133-125)
(340-133-125-63)
你能建议一些解决方案吗? 我必须使用python pandas。
答案 0 :(得分:1)
我认为您尝试使用cumsum
可能无效,因为您没有按col7
分组 - 从您的示例计算中可以明显看出,您只计算{{1}每个值内的累积总和1}},所以我想你想要:
col7
答案 1 :(得分:0)
试试这个。要查找running total
,有不同的方式。
;WITH cte
AS (SELECT *,
Row_number()OVER(partition BY col6 ORDER BY col6)rn
FROM Yourtable) As Col8
SELECT *,
col6 - (SELECT Sum(b.col5)
FROM cte b
WHERE a.col6 = b.col6
AND b.rn <= a.rn)
FROM cte a;
如果您使用Sql Server 2012+
,请尝试此操作。
WITH cte
AS (SELECT *,
Row_number()OVER(partition BY col6 ORDER BY col6)rn
FROM Yourtable)
SELECT *,
col6 - Sum(col5)
OVER(partition BY col6 ORDER BY col6 rows UNBOUNDED PRECEDING) As Col8
FROM cte a
答案 2 :(得分:-1)
用于SQL Server
样本表
CREATE TABLE #TEMP(col1 INT, col2 INT, col3 INT, col4 INT, col5 INT, col6 INT, col7 INT)
INSERT INTO #TEMP
SELECT 1, 1, 1, 1, 137, 500, 11
UNION ALL
SELECT 1, 1, 1, 1, 120, 500, 11
UNION ALL
SELECT 1, 1, 2, 1, 101, 500, 11
UNION ALL
SELECT 1, 1, 3, 1, 55, 500, 11
UNION ALL
SELECT 1, 2, 2, 1, 133, 340, 12
UNION ALL
SELECT 1, 2, 2, 1, 125, 340, 12
UNION ALL
SELECT 1, 2, 1, 1, 63, 340, 12
<强> QUERY 强>
;WITH CTE AS
(
-- Retrieve row number for each type of COL6 in default order
SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
FROM #TEMP
)
-- Append current COL6 with each items
SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
'(' + CAST(C2.COL6 AS VARCHAR(200))+'-'+
-- Retrives comma separated values of previous COL5 values
-- for each type in COL6
SUBSTRING(
(SELECT
REPLACE (', ' + CAST(COL6 AS VARCHAR(200)) + '-' + CAST(COL5 AS VARCHAR(200)),', '+CAST(COL6 AS VARCHAR(200)),'')
FROM CTE
WHERE RNO<=C2.RNO AND COL6=C2.COL6
ORDER BY RNO
FOR XML PATH('')),2,200000) +')' COL8
FROM CTE C2
编辑:
如果您想在COL8
中运行总计并存储,可以尝试以下查询
;WITH CTE AS
(
-- Retrieve row number for each type of COL6 in default order
SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
FROM #TEMP
)
SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
(
SELECT SUM(COL5)+col6
FROM CTE
WHERE RNO<=C2.RNO AND COL6=C2.COL6
GROUP BY col6
)COL8
FROM CTE C2
编辑2:更新了表格的更新查询
;WITH CTE AS
(
-- Retrieve row number for each type of COL6 in default order
SELECT ROW_NUMBER() OVER(PARTITION BY COL6 ORDER BY (SELECT 0))rNO,*
FROM #TEMP
)
UPDATE #TEMP SET COL8 = TAB.COL8
FROM
(
SELECT col1 , col2, col3 , col4 , col5 , col6 , col7,
(
SELECT SUM(COL5)+col6
FROM CTE
WHERE RNO<=C2.RNO AND COL6=C2.COL6
GROUP BY col6
)COL8
FROM CTE C2
)TAB
WHERE TAB.COL5=#TEMP.COL5 AND TAB.COL6=#TEMP.COL6