如何在唯一accountID上以2way关系唯一地对列值求和(检查示例)

时间:2013-10-12 15:35:05

标签: sql r excel

假设我有一个关于小额贷款商店的数据集,我想知道其抵押证券的贷款有多好。问题是,我有多个证券承诺一笔贷款,一个证券承诺多笔贷款:

cAID cID cLoan sID sAmount
1   10  50   200 100    1100
2   11  50  1000 100    1100
3   12  60  1000 110     900
4   12  60  1000 120     300
5   14  80  1000 140    1200
6   15  90  1000 150     900

贷款的转让担保金额不应超过贷款金额,如果担保金额高于贷款金额,并且该担保品还有其他贷款担保,则应为其分配贷款人。

理想情况下,我希望它最终会像这样:

cAIDa cIDa cLoanA sIDa sAmountA asignedSAmountA CoverageRatio
1    10   50    200  100     1100             200           1.0
2    11   50   1000  100     1100             900           0.9
3    12   60   1000  110      900            1000           1.0
4    12   60   1000  120      300            1000           1.0
5    14   80   1000  140     1200            1000           1.0
6    15   90   1000  150      900             900           0.9

如何在R或Excel中生成这两个其他列?基本上我最终需要唯一的客户账户ID和指定的证券总额而不会过度证券。第一笔原始贷款是200(cLoanA),保证金(sAmountA)是1100,因此分配的保证金最多是贷款(asignedSAmountA)因此200,但是相同的保证金用于下一笔贷款(通知相同的sIDa),它是1100,我们第一笔贷款扣除200,因为第二行cLoanA为1000而离开我们900,因此我们使用全额保证金并且比率为0.9,这意味着部分是无担保的。这就是我在asignedSAmountA和Coverage比率中得出第一对数字的方法。

我在R中的代码:     `

#example client AccountID
cAIDa <- c(10, 11, 12, 12, 14, 15)
#example client AccountID
cAIDa <- c(10, 11, 12, 12, 14, 15)
#example client ID
cIDa <- c(50, 50, 60, 60, 80, 90)
#example loan amount
cLoanA <- c(200, 1000, 1000, 1000, 1000, 1000)
#example security pledged ID
sIDa <- c(100, 100, 110, 120, 140, 150)
#example security pledged Amount
sAmountA <- c(1100,1100,900,300,1200, 900)

# generate initial data.frame 
portfolioOne <- data.frame(cAIDa,cIDa,cLoanA,sIDa,sAmountA)
portfolioOne

#assign security sum by unique account id
asignedSAmountA <- c(200, 900, 1200, 1200, 1000, 900)
portfolioTwo <- data.frame(cAIDa,cIDa,cLoanA,sIDa,sAmountA, asignedSAmountA)
ratio1 <- asignedSAmountA/portfolioOne$cLoanA
portfolioTwo$CoverageRatio <- ratio1
portfolioTwo

2 个答案:

答案 0 :(得分:4)

好的,现在我明白了你想要的东西。

以下是我从原始数据中获得的结果:

    cAIDa  cIDa cLoanA  sIDa sAmountA asignedSAmountA  CoverageRatio
    10      50    200   100    1100         200             1.0
    11      50   1000   100    1100         900             0.9
    12      60   1000   110     900        1000             1.0
    12      60   1000   120     300        1000             1.0
    14      80   1000   140    1200        1000             1.0
    15      90   1000   150     900         900             0.9

这是我跑来获取它的VBA宏:

Option Explicit
Sub AllocateSecurity()
    Dim colSec As Collection
    Dim dSec() As Double
    Dim dSecIndex() As Double
    Dim colLoan As Collection
    Dim dLoan() As Double
    Dim dLoanIndex() As Double
    Dim rSrc As Range, vSrc As Variant
    Dim vRes() As Variant
    Dim i As Long, j As Long, d As Double, v As Variant
    Dim rDest As Range

Set rDest = Range("J1")

'Get Original Data
Set rSrc = Range("A1", Cells(Rows.Count, "A").End(xlUp)) _
    .Resize(columnsize:=5)
vSrc = rSrc

'Securities
Set colSec = New Collection
On Error Resume Next
For i = 2 To UBound(vSrc)
    colSec.Add Item:=vSrc(i, 4), Key:=CStr(vSrc(i, 4))
Next i
On Error GoTo 0
ReDim dSec(1 To colSec.Count, 0 To 1)
ReDim dSecIndex(1 To colSec.Count)
For i = 1 To colSec.Count
    dSec(i, 0) = colSec(i)
    dSecIndex(i) = colSec(i)
    dSec(i, 1) = WorksheetFunction.VLookup(colSec(i), _
        Range(rSrc.Columns(4), rSrc.Columns(5)), 2, False)
Next i

'Loans
Set colLoan = New Collection
On Error Resume Next
For i = 2 To UBound(vSrc)
    colLoan.Add Item:=vSrc(i, 1), Key:=CStr(vSrc(i, 1))
Next i
On Error GoTo 0
ReDim dLoan(1 To colLoan.Count, 0 To 1)
ReDim dLoanIndex(1 To colLoan.Count)
For i = 1 To colLoan.Count
    dLoan(i, 0) = colLoan(i)
    dLoanIndex(i) = colLoan(i)
    'covered amt
    dLoan(i, 1) = WorksheetFunction.VLookup(colLoan(i), _
        rSrc, 3, False)
Next i


'Set up Results Array
ReDim vRes(1 To UBound(vSrc), 1 To UBound(vSrc, 2) + 2)

'Headers
v = Array("cAIDa", "cIDa", "cLoanA", "sIDa", "sAmountA", "asignedSAmountA", "CoverageRatio")
For i = 0 To UBound(v)
    vRes(1, i + 1) = v(i)
Next i

With WorksheetFunction
For i = 2 To UBound(vSrc)
    For j = 1 To UBound(vSrc, 2)
        vRes(i, j) = vSrc(i, j)
    Next j
    'LOAN = VSRC(I,1)
    'LOANAMT = DLOAN(.MATCH(LOAN,DLOANINDEX,0),1)
    'SEC = VSRC(I,4)
    'SECAMT = DSEC(.MATCH(SEC,DSECINDEX,0),1)
        'assSecamt
        vRes(i, 6) = .Min(dSec(.Match(vSrc(i, 4), dSecIndex, 0), 1), dLoan(.Match(vSrc(i, 1), dLoanIndex, 0), 1))
        'reduce loan and sec
        dLoan(.Match(vSrc(i, 1), dLoanIndex, 0), 1) = dLoan(.Match(vSrc(i, 1), dLoanIndex, 0), 1) - vRes(i, 6)
        dSec(.Match(vSrc(i, 4), dSecIndex, 0), 1) = dSec(.Match(vSrc(i, 4), dSecIndex, 0), 1) - vRes(i, 6)
Next i
End With

'coverage ratio:  1 - (uncovered amt/ orig amt)
'adjust assigned amt
With WorksheetFunction
For i = 2 To UBound(vRes)
    vRes(i, 7) = 1 - (dLoan(.Match(vRes(i, 1), dLoanIndex, 0), 1) / vRes(i, 3))
    vRes(i, 6) = vRes(i, 3) * vRes(i, 7)
Next i
End With

Set rDest = rDest.Resize(rowsize:=UBound(vRes), columnsize:=UBound(vRes, 2))

    Application.ScreenUpdating = False
rDest.EntireColumn.Clear
rDest = vRes
rDest.Columns(7).NumberFormat = "0.0"
rDest.EntireColumn.AutoFit
    Application.ScreenUpdating = True

End Sub

答案 1 :(得分:1)

读取样本数据集:

portfolioOne <- read.table(text="
  cAID cID cLoan sID sAmount
1   10  50   200 100    1100
2   11  50  1000 100    1100
3   12  60  1000 110     900
4   12  60  1000 120     300
5   14  80  1000 140    1200
6   15  90  1000 150     900
")

从您的示例中可以看出,我们并不关心贷款所涵盖的特定保证金。因此,让我们创建一个列,详细说明帐户cID的总安全存款。然后我们可以通过贷款累计总结它以获得运行余额,从而找出哪些贷款被覆盖/未被覆盖。

请注意,我通过使用duplicated函数查找冗余行来避免重复计算金额。对于保证金,我按子集排除了它们。对于贷款,我将冗余条目设置为0,以便累积金额达到正确的长度。

data.table解决方案:

library(data.table)
dat <- data.table(portfolioOne)
dat[, sTotal := sum(sAmount[!duplicated(sID)]), by = cID]
dat[, cLoanCum := cumsum({x <- cLoan; x[duplicated(cAID)] <- 0; x}), by = cID]
dat[, balance := sTotal - cLoanCum, by = cID]
dat[, assignedAmount := ifelse(balance >= 0, cLoan, pmax(0,cLoan + balance))]
dat[, CoverageRatio := assignedAmount / cLoan]

## delete intermediate columns if desired
dat[, c("sTotal","cLoanCum","balance") := NULL]
dat
   # cAID cID cLoan sID sAmount assignedAmount CoverageRatio
# 1:   10  50   200 100    1100            200           1.0
# 2:   11  50  1000 100    1100            900           0.9
# 3:   12  60  1000 110     900           1000           1.0
# 4:   12  60  1000 120     300           1000           1.0
# 5:   14  80  1000 140    1200           1000           1.0
# 6:   15  90  1000 150     900            900           0.9

plyr解决方案:

library(plyr)
dat <- portfolioOne
dat <- ddply(dat, .(cID), mutate, 
    sTotal = sum(sAmount[!duplicated(sID)]),
    cLoanCum = cumsum({x <- cLoan; x[duplicated(cAID)] <- 0; x}),
    balance = sTotal - cLoanCum,
    assignedAmount = ifelse(balance >= 0, cLoan, pmax(0,cLoan + balance)),
    CoverageRatio = assignedAmount / cLoan
)
dat <- dat[,!(names(dat) %in% c("sTotal","cLoanCum","balance"))]

使用基础R的解决方案:

dat <- split(portfolioOne, portfolioOne$cID)
dat <- lapply(dat, within, {
    sTotal = sum(sAmount[!duplicated(sID)])
    cLoanCum = cumsum({x <- cLoan; x[duplicated(cAID)] <- 0; x})
    balance = sTotal - cLoanCum
    assignedAmount = ifelse(balance >= 0, cLoan, pmax(0,cLoan + balance))
    CoverageRatio = assignedAmount / cLoan
})
dat <- do.call(rbind, dat)
dat <- dat[,c(names(portfolioOne),"assignedAmount","CoverageRatio")]