Excel vlookup多个值并添加重复的行

时间:2016-08-15 00:31:49

标签: r excel excel-vba vlookup vba

我有两个电子表格,如下所示: 我知道如果我从Spreadsheet2开始,我可以只是vlookup并获取值。 但我需要从Spreadsheet1开始。我需要添加行。这是一个很小的数据,但我实际拥有的是巨大的......(超过20000行)。

Spreadsheet1:

Category    Type    NumItem
Air         B747    10
Ground      TBus1   15
Air         B777    20
Air         A380    5

Spreadsheet2:

Type    TypeElement    NumEngine
B747    747T1          2
B747    747T2          4
B747    747T3          8
Tbus1   TbusT1         0
B777    777T1          6
B777    777T2          4
A380    380T1          10

我想将这些内容合并到电子表格中。正如你可以看到两个匹配的类型,但对于每种类型,我有多个“TypeElement”。

我希望它看起来像

Category    Type    NumItem    TypeElement    NumEngine
Air         B747    10         747T1          2
Air         B747    10         747T2          4
Air         B747    10         747T3          8
Ground      TBus1   15         TbusT1         0
Air         B777    20         777T1          6
Air         B777    20         777T2          4
Air         A380    5          380T1          10

这可以使用Excel中的函数来完成吗?我是否必须使用VBA / Macro? 如果有人知道如何使用R来完成,请评论我应该使用哪些公式/包。

谢谢!

3 个答案:

答案 0 :(得分:2)

正如@ r-schifini所提到的,您可以使用几个库来导入Excel文件。在这里,我使用readxl包。要保留第一个电子表格中的所有行(您的电子表格1),请在all.x=TRUE函数中指定merge。有关详细信息,请参阅?merge。请注意,我已在Spreadsheet1中添加了一行,其中包含B700类型的假数据。

library(readxl)
ss1 <- read_excel(path = "spreadsheet1.xlsx", sheet = 1)
ss2 <- read_excel(path = "spreadsheet2.xlsx", sheet = 1)


out <- merge(ss1, ss2, all.x=TRUE)
out
#    Type Category NumItem TypeElement NumEngine
# 1  A380      Air       5       380T1        10
# 2  B700      Air       8        <NA>        NA
# 3  B747      Air      10       747T1         2
# 4  B747      Air      10       747T2         4
# 5  B747      Air      10       747T3         8
# 6  B777      Air      20       777T1         6
# 7  B777      Air      20       777T2         4
# 8 TBus1   Ground      15        <NA>        NA

为什么第8行有NA个?这是因为您的类型在Spreadsheet1中为TBus1,在Spreadsheet2中为Tbus1。为了避免这样的问题,我们可以在合并之前将案例更改为高级。

ss1$Type <- toupper(ss1$Type)
ss2$Type <- toupper(ss2$Type)
out <- merge(ss1, ss2, all.x=TRUE)
out
#    Type Category NumItem TypeElement NumEngine
# 1  A380      Air       5       380T1        10
# 2  B700      Air       8        <NA>        NA
# 3  B747      Air      10       747T1         2
# 4  B747      Air      10       747T2         4
# 5  B747      Air      10       747T3         8
# 6  B777      Air      20       777T1         6
# 7  B777      Air      20       777T2         4
# 8 TBUS1   Ground      15      TbusT1         0

答案 1 :(得分:0)

一些想法 1)您可以尝试组合行并使用sort函数,按元素项中的升序或降序值对值进行排序。例如,进行自定义排序或过滤。

2)您必须选择如何对值进行分类。按类别?按类型?有一些方法可以将这些项目本地分组。设置变量之间的链接。

答案 2 :(得分:0)

我使用VBA将您的tb1和tb2移动到Access文件(c:\ testdb.mdb)。 然后使用SQL命令加入它们


Sub Main()
    Dim adoxCat As Object, adoConn As Object, adoRst As Object, var As Variant, strSQL As String
    Dim i As Long

'make an empty mdb file'
    If Dir("C:\testdb.mdb") = "" Then
        Set adoxCat = CreateObject("ADOX.catalog")
        adoxCat.Create "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
        Set adoxCat = Nothing
    Else
        MsgBox "C:\testdb.mdb is existed.", vbCritical
        Exit Sub
    End If
'create an ADO connection'
    On Error Resume Next
    Set adoConn = CreateObject("adodb.connection")
    With adoConn
        .Open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
        If .State  1 Then
            MsgBox "Cannot create ADO Connection.", vbCritical
            Set adoConn = Nothing
            Exit Sub
        End If
    End With
'create two Tables in the mdb file.'
    With adoConn
        .Execute "CREATE TABLE tb_1 (Category varchar, Type varchar, NumItem number)"
        .Execute "CREATE TABLE tb_2 (Type varchar, TypeElement varchar, NumEngine number)"

'move data in excel to mdb file'
        var = toArray(Worksheets(1))
        For i = LBound(var, 1) To UBound(var, 1)
            strSQL = "INSERT INTO tb_1 (category, type, NumItem) VALUES("
            strSQL = strSQL & " '" & var(i, 0) & "',"
            strSQL = strSQL & " '" & var(i, 1) & "',"
            strSQL = strSQL & "  " & var(i, 2) & " );"
            .Execute strSQL
        Next i

        var = toArray(Worksheets(2))
        For i = LBound(var, 1) To UBound(var, 1)
            strSQL = "INSERT INTO tb_2 (Type, TypeElement, NumEngine) VALUES("
            strSQL = strSQL & " '" & var(i, 0) & "',"
            strSQL = strSQL & " '" & var(i, 1) & "',"
            strSQL = strSQL & "  " & var(i, 2) & " );"
            .Execute strSQL
        Next i 

'Use SQL Join statement to Join two tables'
        strSQL = "SELECT * FROM tb_1 left join tb_2 on tb_1.type = tb_2.type;"
        Set adoRst = .Execute(strSQL)
'output the result to excel worksheet(3)'
        Worksheets(3).Range("A1").CopyFromRecordset adoRst

        .Close
    End With
    Set adoConn = Nothing
'remove the mdb file'
    Kill "c:\testdb.mdb" 
End Sub


Function toArray(from_WSht As Worksheet) As Variant
    Dim strPath As String, myRng As Range, rw As Range, c As Range
    Dim i As Long, j As Long, dt As Variant

    Set myRng = from_WSht.Range("a1").CurrentRegion
    If not myRng.Rows.Count > 1 Then GoTo errHdr
        ReDim dt(myRng.Rows.Count - 1, myRng.Columns.Count - 1) As Variant
        i = 0
        For Each rw In myRng.Rows
            If rw.Row > 1 Then
                j = 0
                For Each c In rw.Cells
                    dt(i, j) = c.Value
                    j = j + 1
                Next c
                i = i + 1
            End If
        Next rw

    toArray = dt
    Exit Function
errHdr:
    toArray = 0
End Function

enter image description here