我有两个电子表格,如下所示: 我知道如果我从Spreadsheet2开始,我可以只是vlookup并获取值。 但我需要从Spreadsheet1开始。我需要添加行。这是一个很小的数据,但我实际拥有的是巨大的......(超过20000行)。
Spreadsheet1:
Category Type NumItem
Air B747 10
Ground TBus1 15
Air B777 20
Air A380 5
Spreadsheet2:
Type TypeElement NumEngine
B747 747T1 2
B747 747T2 4
B747 747T3 8
Tbus1 TbusT1 0
B777 777T1 6
B777 777T2 4
A380 380T1 10
我想将这些内容合并到电子表格中。正如你可以看到两个匹配的类型,但对于每种类型,我有多个“TypeElement”。
我希望它看起来像
Category Type NumItem TypeElement NumEngine
Air B747 10 747T1 2
Air B747 10 747T2 4
Air B747 10 747T3 8
Ground TBus1 15 TbusT1 0
Air B777 20 777T1 6
Air B777 20 777T2 4
Air A380 5 380T1 10
这可以使用Excel中的函数来完成吗?我是否必须使用VBA / Macro? 如果有人知道如何使用R来完成,请评论我应该使用哪些公式/包。
谢谢!
答案 0 :(得分:2)
正如@ r-schifini所提到的,您可以使用几个库来导入Excel文件。在这里,我使用readxl
包。要保留第一个电子表格中的所有行(您的电子表格1),请在all.x=TRUE
函数中指定merge
。有关详细信息,请参阅?merge
。请注意,我已在Spreadsheet1中添加了一行,其中包含B700
类型的假数据。
library(readxl)
ss1 <- read_excel(path = "spreadsheet1.xlsx", sheet = 1)
ss2 <- read_excel(path = "spreadsheet2.xlsx", sheet = 1)
out <- merge(ss1, ss2, all.x=TRUE)
out
# Type Category NumItem TypeElement NumEngine
# 1 A380 Air 5 380T1 10
# 2 B700 Air 8 <NA> NA
# 3 B747 Air 10 747T1 2
# 4 B747 Air 10 747T2 4
# 5 B747 Air 10 747T3 8
# 6 B777 Air 20 777T1 6
# 7 B777 Air 20 777T2 4
# 8 TBus1 Ground 15 <NA> NA
为什么第8行有NA
个?这是因为您的类型在Spreadsheet1中为TBus1
,在Spreadsheet2中为Tbus1
。为了避免这样的问题,我们可以在合并之前将案例更改为高级。
ss1$Type <- toupper(ss1$Type)
ss2$Type <- toupper(ss2$Type)
out <- merge(ss1, ss2, all.x=TRUE)
out
# Type Category NumItem TypeElement NumEngine
# 1 A380 Air 5 380T1 10
# 2 B700 Air 8 <NA> NA
# 3 B747 Air 10 747T1 2
# 4 B747 Air 10 747T2 4
# 5 B747 Air 10 747T3 8
# 6 B777 Air 20 777T1 6
# 7 B777 Air 20 777T2 4
# 8 TBUS1 Ground 15 TbusT1 0
答案 1 :(得分:0)
一些想法 1)您可以尝试组合行并使用sort函数,按元素项中的升序或降序值对值进行排序。例如,进行自定义排序或过滤。
2)您必须选择如何对值进行分类。按类别?按类型?有一些方法可以将这些项目本地分组。设置变量之间的链接。
答案 2 :(得分:0)
我使用VBA将您的tb1和tb2移动到Access文件(c:\ testdb.mdb)。 然后使用SQL命令加入它们
Sub Main()
Dim adoxCat As Object, adoConn As Object, adoRst As Object, var As Variant, strSQL As String
Dim i As Long
'make an empty mdb file'
If Dir("C:\testdb.mdb") = "" Then
Set adoxCat = CreateObject("ADOX.catalog")
adoxCat.Create "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
Set adoxCat = Nothing
Else
MsgBox "C:\testdb.mdb is existed.", vbCritical
Exit Sub
End If
'create an ADO connection'
On Error Resume Next
Set adoConn = CreateObject("adodb.connection")
With adoConn
.Open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=C:\testdb.mdb;"
If .State 1 Then
MsgBox "Cannot create ADO Connection.", vbCritical
Set adoConn = Nothing
Exit Sub
End If
End With
'create two Tables in the mdb file.'
With adoConn
.Execute "CREATE TABLE tb_1 (Category varchar, Type varchar, NumItem number)"
.Execute "CREATE TABLE tb_2 (Type varchar, TypeElement varchar, NumEngine number)"
'move data in excel to mdb file'
var = toArray(Worksheets(1))
For i = LBound(var, 1) To UBound(var, 1)
strSQL = "INSERT INTO tb_1 (category, type, NumItem) VALUES("
strSQL = strSQL & " '" & var(i, 0) & "',"
strSQL = strSQL & " '" & var(i, 1) & "',"
strSQL = strSQL & " " & var(i, 2) & " );"
.Execute strSQL
Next i
var = toArray(Worksheets(2))
For i = LBound(var, 1) To UBound(var, 1)
strSQL = "INSERT INTO tb_2 (Type, TypeElement, NumEngine) VALUES("
strSQL = strSQL & " '" & var(i, 0) & "',"
strSQL = strSQL & " '" & var(i, 1) & "',"
strSQL = strSQL & " " & var(i, 2) & " );"
.Execute strSQL
Next i
'Use SQL Join statement to Join two tables'
strSQL = "SELECT * FROM tb_1 left join tb_2 on tb_1.type = tb_2.type;"
Set adoRst = .Execute(strSQL)
'output the result to excel worksheet(3)'
Worksheets(3).Range("A1").CopyFromRecordset adoRst
.Close
End With
Set adoConn = Nothing
'remove the mdb file'
Kill "c:\testdb.mdb"
End Sub
Function toArray(from_WSht As Worksheet) As Variant
Dim strPath As String, myRng As Range, rw As Range, c As Range
Dim i As Long, j As Long, dt As Variant
Set myRng = from_WSht.Range("a1").CurrentRegion
If not myRng.Rows.Count > 1 Then GoTo errHdr
ReDim dt(myRng.Rows.Count - 1, myRng.Columns.Count - 1) As Variant
i = 0
For Each rw In myRng.Rows
If rw.Row > 1 Then
j = 0
For Each c In rw.Cells
dt(i, j) = c.Value
j = j + 1
Next c
i = i + 1
End If
Next rw
toArray = dt
Exit Function
errHdr:
toArray = 0
End Function