通过唯一标识符,多个值(列),每个变量多个元素(行)折叠多行

时间:2018-11-04 21:08:56

标签: r aggregate delimiter-separated-values

我正在尝试通过R中的唯一标识符折叠多行,但是每一列都可以有多个变量,我想保留在单独的行中。我怎么做?我已经尝试过“ separate_rows”函数,但是它说每个嵌套列必须具有相同数量的元素。我的不是。

private void initiateWebView(String formulaML, WebView formulaWebview){
    formulaWebview.setWebViewClient(new WebViewClient() {
        @Override
        public void onPageFinished(WebView view, String url) {
            super.onPageFinished(view, url);
            view.loadUrl("javascript:document.getElementById('math').innerHTML='" +doubleEscapeTeX(formulaML)+"';");
            view.loadUrl("javascript:MathJax.Hub.Queue(['Typeset',MathJax.Hub]);");
        }
    });

    final String mathJaxOfflineUrl = "file:///android_asset/MathJax/MathJax.js";
    formulaWebview.loadDataWithBaseURL("http://bar/", "<script type='text/x-mathjax-config'>"
            +"MathJax.Hub.Config({ "
            +"showMathMenu: false, "
            +"jax: ['input/MathML','output/HTML-CSS'], " // output/SVG

            +"extensions: ['mml2jax.js'], "
            +"TeX: { extensions: ['noErrors.js','noUndefined.js'] }, "
            // +"jax: ['input/AsciiMath','output/HTML-CSS'], "
            //  +"extensions: ['asciimath2jax.js'], "
            //  +"AsciiMath: { fixphi: true, useMathMLspacing: true, displaystyle: false, decimalsign: \".\" }, "
            +"});</script>"
            +"<script type='text/javascript' "
            +"src='"+mathJaxOfflineUrl+"'"
            +"></script><span id='math'></span>","text/html","utf-8","");
}

要从上面的Example1转到Example2,我使用了以下代码(需要plyr或dplyr)...

## Example1 start
sample_A.1 <- c ("sample_A", "","D","")
sample_A.2 <- c("sample_A", "","D","")    
sample_A.3 <- c("sample_A", "","","red")    
sample_B.1 <- c("sample_B", 28, "", "")
sample_B.2 <- c("sample_B", "", "D", "")
sample_B.3 <- c("sample_B", "", "N", "")
sample_B.4 <- c("sample_B", "", "", "orange")
sample_C.1 <- c("sample_C", 27, "", "")
sample_C.2 <- c("sample_C", 32, "", "")
sample_C.3 <- c("sample_C", "", "E", "")
sample_C.4 <- c("sample_C", "", "", "orange")
sample_C.5 <- c("sample_C", "", "", "yellow")
sample_C.6 <- c("sample_C", "", "", "green")
my.data <- data.frame(rbind(sample_A.1,sample_A.2,sample_A.3,sample_B.1,sample_B.2,sample_B.3,sample_B.4,sample_C.1,sample_C.2,sample_C.3,sample_C.4,sample_C.5,sample_C.6), stringsAsFactors = FALSE)
colnames(my.data) <- c("identifier", "var1", "var2", "var3")

# identifier   var1   var2   var3
# sample_A     30
# sample_A            D
# sample_A                   red
# sample_B     28
# sample_B            D
# sample_B            N
# sample_B                   orange
# sample_C     27
# sample_C     32
# sample_C            E
# sample_C                   orange
# sample_C                   yellow
# sample_C                   green

## Or

## Example2 start
sample_A <- c("sample_A", 30, "D", "red")
sample_B <- c("sample_B", 28, "D, N", "orange")
sample_C <- c("sample_C", "27, 32", "E", "orange, yellow, green")
my.data2 <- data.frame(rbind(sample_A, sample_B, sample_C), stringsAsFactors = FALSE)
colnames(my.data2) <- c("identifier", "var1", "var2", "var3")

# identifier   var1   var2   var3
# sample_A     30     D      red
# sample_B     28     D, N   orange
# sample_C     27, 32 E      orange, yellow, green

我尝试使用以下代码将my.data2中的数据分解为单独的行...(需要tidyr和tibble)

library(dplyr)
my.data2a <- my.data %>% group_by(identifier) %>%
    summarize_all(funs(paste(.[!is.na(.)], collapse = " ")))
my.data2 <- data.frame(lapply(my.data2a, trimws), stringsAsFactors=FALSE)
my.data2[2:4] <- sapply(my.data2[2:4], gsub, pattern=" ", replacement=", ")

但是我得到了错误:“错误:所有嵌套列必须具有相同数量的元素。”我需要进入上方“示例输出”链接中的“结尾为”表,该链接基本上如下所示:

data2colnames <- c("var1", "var2", "var3")
my.data2 %>% separate_rows(data2colnames)

1 个答案:

答案 0 :(得分:0)

尝试了一些丑陋的事情。一定有更漂亮的东西。

library(purrr)
df=my.data
df[df==""]=NA

x= df %>% split(.$identifier) %>% map(function(x)sapply(x, function(y)sort(unique(y),dec=T)))
xx=map_dfr(x, function(x){
    mx=max(sapply(x, length))
    z=lapply(x, function(y){
        if(length(y)>0 & startsWith(y[1],"sample")){
            c(y,rep(y[1], mx-length(y)))
        }else
            c(y,rep("", mx-length(y)))
    })  
}) %>% as.data.frame

nm=xx$identifier
xx$identifier = unlist(lapply(split(nm,nm), function(x)if(length(x)>1)paste0(x,".",1:length(x)) else x ))

xx