通过字符串分组和组合

时间:2018-12-13 15:05:29

标签: python group-by aggregate

为简化我的问题,我创建了一个小的DataFrame,如下所示:

Type  From  To
A    "H1"  "U1"
A    "H9"  "I8"
A    "H1"  "IL"
B    "P2"  "P8"
B    "P2"  "P7"
C    "P9"  "O8"
C    "P9"  "I0"
C    "P7"  "O8"

在对字符串进行分组并加以补充之后,我们应该获得以下期望的结果:

Type  From  To 
A    "H1"  "U1, IL"
A    "H9"  "I8"
B    "P2"  "P8, P7"
C    "P9"  "O8, I0"
C    "P7"  "O8"

我使用Rsplit函数在aggregate中做到了。对于任何想法或建议,我将非常感谢!

4 个答案:

答案 0 :(得分:5)

使用熊猫-

extension UIView {

    func addBorders(edges: UIRectEdge = .all, color: UIColor = .black, width: CGFloat = 1.0) {

        func createBorder() -> UIView {
            let borderView = UIView(frame: CGRect.zero)
            borderView.translatesAutoresizingMaskIntoConstraints = false
            borderView.backgroundColor = color
            return borderView
        }

        if (edges.contains(.all) || edges.contains(.top)) {
            let topBorder = createBorder()
            self.addSubview(topBorder)
            NSLayoutConstraint.activate([
                topBorder.topAnchor.constraint(equalTo: self.topAnchor),
                topBorder.leadingAnchor.constraint(equalTo: self.leadingAnchor),
                topBorder.trailingAnchor.constraint(equalTo: self.trailingAnchor),
                topBorder.heightAnchor.constraint(equalToConstant: width)
                ])
        }
        if (edges.contains(.all) || edges.contains(.left)) {
            let leftBorder = createBorder()
            self.addSubview(leftBorder)
            NSLayoutConstraint.activate([
                leftBorder.topAnchor.constraint(equalTo: self.topAnchor),
                leftBorder.bottomAnchor.constraint(equalTo: self.bottomAnchor),
                leftBorder.leadingAnchor.constraint(equalTo: self.leadingAnchor),
                leftBorder.widthAnchor.constraint(equalToConstant: width)
                ])
        }
        if (edges.contains(.all) || edges.contains(.right)) {
            let rightBorder = createBorder()
            self.addSubview(rightBorder)
            NSLayoutConstraint.activate([
                rightBorder.topAnchor.constraint(equalTo: self.topAnchor),
                rightBorder.bottomAnchor.constraint(equalTo: self.bottomAnchor),
                rightBorder.trailingAnchor.constraint(equalTo: self.trailingAnchor),
                rightBorder.widthAnchor.constraint(equalToConstant: width)
                ])
        }
        if (edges.contains(.all) || edges.contains(.bottom)) {
            let bottomBorder = createBorder()
            self.addSubview(bottomBorder)
            NSLayoutConstraint.activate([
                bottomBorder.bottomAnchor.constraint(equalTo: self.bottomAnchor),
                bottomBorder.leadingAnchor.constraint(equalTo: self.leadingAnchor),
                bottomBorder.trailingAnchor.constraint(equalTo: self.trailingAnchor),
                bottomBorder.heightAnchor.constraint(equalToConstant: width)
                ])
        }
    }
}

答案 1 :(得分:2)

R中,我们可以按paste进行分组。 (请注意,问题在第一次发布时有一个R标签。否则,我们甚至都不会尝试这种R解决方案)

library(tidyverse)
df1 %>%
    group_by(Type, From) %>%
    summarise(To = toString(To))
# A tibble: 5 x 3
# Groups:   Type [?]
#  Type  From  To    
#  <chr> <chr> <chr> 
#1 A     H1    U1, IL
#2 A     H9    I8    
#3 B     P2    P8, P7
#4 C     P7    O8    
#5 C     P9    O8, I0

数据

df1 <- structure(list(Type = c("A", "A", "A", "B", "B", "C", "C", "C"
), From = c("H1", "H9", "H1", "P2", "P2", "P9", "P9", "P7"), 
To = c("U1", "I8", "IL", "P8", "P7", "O8", "I0", "O8")),
 class = "data.frame", row.names = c(NA, 
 -8L))

python中,我们可以做到

out = df2.groupby(['Type', 'From'])['To'].apply(lambda x: ','.join(x)).reset_index()
print(out)
# Type From     To
#0    A   H1  U1,IL
#1    A   H9     I8
#2    B   P2  P8,P7
#3    C   P7     O8
#4    C   P9  O8,I0

数据

import pandas as pd
df2 = pd.DataFrame({'Type': ['A', 'A', 'A', 'B', 'B', 'C', 'C', 'C'], \
                'From': ['H1', 'H9', 'H1', 'P2', 'P2', 'P9', 'P9', 'P7'], \
                'To': ['U1', 'I8', 'IL', 'P8', 'P7', 'O8', 'I0', 'O8']})

答案 2 :(得分:2)

称之为黑客。

df.groupby('From', as_index=False).agg({'To':', '.join, 'Type':'first'} )

答案 3 :(得分:0)

使用 datar 也很容易实现:

>>> from pipda import register_func
>>> from datar.all import f, tribble, group_by, summarise
>>> 
>>> df = tribble(
...     f.Type,  f.From,  f.To,
...     "A",     "H1",    "U1",
...     "A",     "H9",    "I8",
...     "A",     "H1",    "IL",
...     "B",     "P2",    "P8",
...     "B",     "P2",    "P7",
...     "C",     "P9",    "O8",
...     "C",     "P9",    "I0",
...     "C",     "P7",    "O8",
... )
>>> 
>>> @register_func(None)
... def paste(x, sep=', '):
...     return sep.join(x)
... 
>>> df >> group_by(f.Type, f.From) >> summarise(To=paste(f.To))
[2021-06-11 18:13:18][datar][   INFO] `summarise()` has grouped output by ['Type'] (override with `_gro
ups` argument)
      Type     From       To
  <object> <object> <object>
0        A       H1   U1, IL
1        A       H9       I8
2        B       P2   P8, P7
3        C       P7       O8
4        C       P9   O8, I0
[Groups: ['Type'] (n=3)]
>>> df >> group_by(f.Type, f.From) >> summarise(To=paste(f.To, sep=';'))
[2021-06-11 18:13:52][datar][   INFO] `summarise()` has grouped output by ['Type'] (override with `_gro
ups` argument)
      Type     From       To
  <object> <object> <object>
0        A       H1    U1;IL
1        A       H9       I8
2        B       P2    P8;P7
3        C       P7       O8
4        C       P9    O8;I0
[Groups: ['Type'] (n=3)]

我是包的作者。如果您有任何问题,请随时提交问题。