计算“严重”分区数据集中的组差异

时间:2018-11-29 14:07:48

标签: r dplyr tidyr

我尝试通过SO的问题解决问题,但找不到满意的答案。我的数据框具有结构

Sub ConvertTDMStoCSV()
'
' ConvertTDMS Macro
'
' Acts upon all .tdms files in a "source" directory,
' loading each one using the ExcelTDM Add In,
' deleting the first sheet and saving the
' remaining stream data as one .csv file
' in a "target" directory.  Writes a list of
' the files converted in a new sheet.
'
Dim sourceDir As String, targetDir As String, fn As String, fnBase As String
Dim fso As Object, n As Long, resp As Integer, strNow As String, newSheet As Object
Dim tdmsAddIn As COMAddIn, importedWorkbook As Object
Set fso = CreateObject("Scripting.FileSystemObject")

Set tdmsAddIn = Application.COMAddIns.Item("ExcelTDM.TDMAddin")
tdmsAddIn.Connect = True
Call tdmsAddIn.Object.Config.RootProperties.DeselectAll
Call tdmsAddIn.Object.Config.ChannelProperties.DeselectAll
tdmsAddIn.Object.Config.RootProperties.SelectCustomProperties = False
tdmsAddIn.Object.Config.GroupProperties.SelectCustomProperties = False
tdmsAddIn.Object.Config.ChannelProperties.SelectCustomProperties = False


'Choose TDMS Source Directory
With Application.FileDialog(msoFileDialogFolderPicker)
    .Title = "Choose Source Directory of TDMS Files"
    .AllowMultiSelect = False
    .InitialFileName = ThisWorkbook.Path & "\"
    .Show
    On Error Resume Next
    sourceDir = .SelectedItems(1)
    Err.Clear
    On Error GoTo 0
End With
If Dir(sourceDir, vbDirectory) = "" Then
    MsgBox "No such folder.", vbCritical, sourceDir
    Exit Sub
End If

'Choose CSV Target Directory
With Application.FileDialog(msoFileDialogFolderPicker)
    .Title = "Choose Target Directory for CSV Files"
    .AllowMultiSelect = False
    .InitialFileName = ThisWorkbook.Path & "\"
    .Show
    On Error Resume Next
    targetDir = .SelectedItems(1)
    Err.Clear
    On Error GoTo 0
End With
If Dir(targetDir, vbDirectory) = "" Then
    MsgBox "No such folder.", vbCritical, targetDir
    Exit Sub
End If



fn = Dir(sourceDir & "\*.tdms")
If fn = "" Then
    MsgBox "No source TDMS files found.", vbInformation
    Exit Sub
End If

resp = MsgBox("Begin conversion of TDMS files?" & vbCrLf & sourceDir & vbCrLf & "to" & vbCrLf & targetDir, vbYesNo, "Confirmation")
If resp = vbNo Then
    MsgBox "Execution cancelled by user."
    Exit Sub
End If

Set newSheet = ThisWorkbook.Sheets.Add(After:=ThisWorkbook.Sheets(ThisWorkbook.Sheets.Count))
strNow = WorksheetFunction.Text(Now(), "m-d-yyyy h_mm_ss")
newSheet.Name = strNow
newSheet.Cells(1, 1).Value = "Files converted on " & strNow
newSheet.Cells(2, 1).Value = "TDMS Source Directory: " & sourceDir
newSheet.Cells(3, 1).Value = "CSV Target Directory: " & targetDir


Application.Calculation = xlCalculationManual
Application.ScreenUpdating = False
n = 5
Do While fn <> ""
    fnBase = fso.GetBaseName(fn)

    On Error Resume Next
    Call tdmsAddIn.Object.ImportFile(sourceDir & "\" & fn, True)
    If Err Then
        MsgBox Err.Description, vbCritical
        Exit Sub
    End If
    Set importedWorkbook = ActiveWorkbook
    Application.DisplayAlerts = False
    importedWorkbook.Sheets(1).Delete
    importedWorkbook.SaveAs Filename:=targetDir & "\" & fnBase & ".csv", FileFormat:=xlCSV
    importedWorkbook.Close savechanges:=False
    Application.DisplayAlerts = True

    newSheet.Cells(n, 1).Value = fnBase
    n = n + 1
    fn = Dir
Loop
Application.Calculation = xlCalculationAutomatic
Application.ScreenUpdating = True


Set fso = Nothing
Set newSheet = Nothing
Set importedWorkbook = Nothing
End Sub

看起来像

X = data_frame(
        treat = c(rep(1,4), rep(2,4), rep(3,4), rep(4,4)),
        id = seq(1:16), 
        x = rnorm(16), 
        y = rnorm(16), 
        z = rnorm(16)
    )

# A tibble: 16 x 5 treat id x y z <int> <int> <dbl> <dbl> <dbl> 1 1 1 -0.0724 1.26 0.317 2 1 2 -0.486 -0.628 0.392 3 1 3 -0.406 -0.706 1.18 4 1 4 -1.35 -1.27 2.36 5 2 5 -0.0751 -0.0394 0.568 6 2 6 0.243 0.873 0.132 7 2 7 0.138 0.611 -0.700 8 2 8 -0.732 1.02 -0.811 9 3 9 -0.0278 1.78 0.568 10 3 10 0.526 1.18 1.03 11 3 11 1.43 0.0937 -0.0825 12 3 12 -0.299 -0.117 0.367 13 4 13 1.05 2.04 0.678 14 4 14 -1.93 0.201 0.250 15 4 15 0.624 1.09 0.852 16 4 16 0.502 0.119 -0.843 中的第四个值是一个控件,现在我要计算处理与控件之间的treatxy之间的差异。例如,我想计算第一种治疗方法

z

第一次治疗。对于第二种治疗方法,

-0.724 - (-1.35) #x
1.26 - (-1.27)   #y
0.317 - 2.36     #z

...等等。

我想使用-0.486 - (-1.35) #x -0.628 - (-1.27) #y 0.392 - 2.36 #z / dplyr解决方案,但我不知道如何以“流畅”的方式进行操作。我已经通过使用联接找到了一个解决方案,但与通常提供的“平滑”解决方案tidyverse相比,这似乎很乏味。

1 个答案:

答案 0 :(得分:2)

使用dplyr,我们可以group_by treat并使用mutate_at选择特定列(xz)并减去每个值使用nth函数获得第4个值。

library(dplyr)
X %>%
  group_by(treat) %>%
  mutate_at(vars(x:z), funs(. - nth(., 4)))


#treat    id      x      y       z
#   <dbl> <int>  <dbl>  <dbl>   <dbl>
# 1     1     1 -0.631  0.971  0.206 
# 2     1     2 -0.301 -1.49   0.189 
# 3     1     3  1.49   1.17   0.133 
# 4     1     4  0      0      0     
# 5     2     5  1.39  -0.339  0.934 
# 6     2     6  2.98   0.511  0.319 
# 7     2     7  1.73  -0.297  0.0745
# 8     2     8  0      0      0     
# 9     3     9 -1.05  -0.778 -2.86  
#10     3    10 -0.805 -1.84  -2.38  
#11     3    11  0.864  0.684 -3.43  
#12     3    12  0      0      0     
#13     4    13 -1.39  -0.843  1.67  
#14     4    14 -1.68   1.55  -0.656 
#15     4    15 -2.34   0.722  0.0638
#16     4    16  0      0      0     

这也可以写为

X %>%
  group_by(treat) %>%
  mutate_at(vars(x:z), funs(. - .[4]))

数据

set.seed(123)
X = data_frame(
   treat = c(rep(1,4), rep(2,4), rep(3,4), rep(4,4)),
   id = seq(1:16), 
   x = rnorm(16), 
   y = rnorm(16), 
   z = rnorm(16)
)