我的数据框看起来像:
table(df$partnumber, df$personworking)
等数千行。同一部分可以分配多个人。我总结 R 中的数据非常糟糕,但我能够通过键入来生成(在控制台中)一个看起来像频率矩阵的表:
Part Number NumWorkers Names
A 3 "James, Andrea"
B 1 "Brian"
C 1 "Tiffany"
它将独特的项目作为行,每个人的工作名称作为列进行吐出。值为0或1,具体取决于它们是否正在运行该部分。
我正在寻找的是一种以易于理解的格式汇总这些信息的方法,根据项目说明:
thedataframe <- data.frame(thetable[,])
我也在努力让我的桌子进入数据框架。我试过了:
Dim fd As OpenFileDialog = New OpenFileDialog()
fd.Title = "Open File Dialog"
fd.InitialDirectory = "C:\"
fd.Filter = "All files (*.xlsx)|*.xlsx|All files (*.xlsx)|*.xlsx"
fd.FilterIndex = 2
fd.RestoreDirectory = True
Dim myStream As Stream = Nothing
If fd.ShowDialog() = DialogResult.OK Then
Try
myStream = fd.OpenFile()
If (myStream IsNot Nothing) Then
workbook = APP.Workbooks.Open(fd.FileName)
worksheet = workbook.Worksheets("sheet1")
TextBox1.Text = worksheet.Cells(1, 7).Value
TextBox2.Text = worksheet.Cells(2, 7).Value
TextBox3.Text = worksheet.Cells(3, 7).Value
l1.Text = worksheet.Cells(2, 1).Value * 10
w1.Text = worksheet.Cells(2, 2).Value * 10
q1.Text = worksheet.Cells(2, 3).Value
p1.Text = worksheet.Cells(2, 4).Value..........
Dim Values(119, 3) As String
Values(0, 0) = l1.Text
Values(0, 1) = w1.Text
Values(0, 2) = q1.Text
Values(0, 3) = p1.Text.........
Dim add As Integer = 0
Dim pressing As Integer = 0
If adding.Text = "50" Or adding.Text = "" Then
add = 50
Else
add = Convert.ToInt16(adding.Text)
End If
If press.Text = "20" Or press.Text = "" Then
pressing = 20
Else
pressing = Convert.ToInt16(press.Text)
End If
Dim l As Integer
Dim w As Integer
Dim machinearea As Integer
Dim connetionString As String
Dim cnn As SqlConnection
connetionString = "Data Source=.;Initial Catalog=lumber;User ID=sa;Password=sasql"
cnn = New SqlConnection(connetionString)
Dim cmd As SqlCommand
Dim myreader As SqlDataReader
Dim query As String
query = "SELECT length,width from marea"
cmd = New SqlCommand(query, cnn)
cnn.Open()
myreader = cmd.ExecuteReader()
If myreader.Read() Then
l = myreader.Item("length")
w = myreader.Item("width")
End If
cnn.Close()
machinearea = l * w
Dim allTextBoxes2 = From txt In Me.Panel1.Controls.OfType(Of TextBox)()
Order By txt.TabIndex
Dim txtList2 = allTextBoxes2.ToList()
For i As Int32 = 0 To txtList2.Count - 1
Dim thisTxt = txtList2(i)
Dim nextIndex = If(i + 1 >= txtList2.Count, 0, i + 1)
Dim prevIndex = If(i - 1 < 0, txtList2.Count - 1, i - 1)
Dim nextTxt = txtList2(nextIndex)
Dim prevTxt = txtList2(prevIndex)
Dim testInt As Integer = 0
If thisTxt.Text = "0" Then
thisTxt.Clear()
End If
Next
Dim allTextBoxes = From txt In Me.Panel1.Controls.OfType(Of TextBox)()
Order By txt.TabIndex
Dim txtList = allTextBoxes.ToList()
For i As Int32 = 0 To txtList.Count - 1
Dim thisTxt = txtList(i)
For j = 2 To -1
For k = 1 To 4
thisTxt.Text = worksheet.Cells(i + j, k).Value
Next
Next
Next
End If
Catch Ex As Exception
MessageBox.Show("Cannot read file from disk. Original error: " & Ex.Message)
Finally
' Check this again, since we need to make sure we didn't throw an exception on open.
If (myStream IsNot Nothing) Then
myStream.Close()
End If
End Try
End If
但我没有走得太远。我想总结每个独特部分的工作量,并连接并打印每个列名称,其中有一个作为给定部分的值。
在Base R 中汇总此数据的最佳方法是什么?
答案 0 :(得分:0)
我们可以使用data.table
。转换&#39; data.frame&#39;到&#39; data.table&#39; (setDT(df)
),按&#39; partnumber&#39;分组,获取行数(.N
)和paste
&#39; personworking&#39;在每个&#39;部分编号&#39;。
library(data.table)
setDT(df)[,.(NumWorkers = .N, Names = toString(personworking)) , by = partnumber]
或我们可以使用dplyr
library(dplyr)
df %>%
group_by(partnumber) %>%
summarise(NumWorkers = n(), Names = toString(personworking))
或使用base R
do.call(rbind, by(df, df$partnumber, FUN = function(x)
data.frame(NumWorkers = length(x$personworking), Names = toString(x$personworking))))
答案 1 :(得分:0)
以下是您可以在基座R中使用aggregate
:
dfAgg <- do.call(data.frame,
aggregate(df$Person, list(df$Parts),
FUN=function(x) c(length(x), paste(x, collapse=", "))))
# add nicer names
names(dfAgg) <- c("Parts", "Count", "Person")
Aggregate允许您在组上运行功能。在这个例子中,我们正在运行一个函数,它返回个体计数(通过length
)及其名称(通过paste
)。
以下是我用来测试它的示例数据。
数据强>
set.seed(1234)
df <- data.frame("Parts"=sample(LETTERS[1:3], 10, replace=T),
"Person"=sample(c("James", "Brian", "Sam", "Tiff", "Sandy"),
10, replace=T), stringsAsFactors=F)