Question

问题：我需要在Excel工作簿的Notes / Comments中读取数据单元格作为R中自己的数据列。

Excel Note/Comment screenshot

我已经找到了进入Tidyxl软件包的方法，但是在加载数据时我无法使脚本正常工作。

我已经启动了一个GitHub线程来引用输入数据，Rscript和我希望最终产品的外观：

Answer 1

xlsx软件包也支持这一点。

library(xlsx)
library(tidyverse)

wb <- xlsx::loadWorkbook(file = 'test_data_30oct2020 (1).xlsx')

sheets <- getSheets(wb)
sheet <- sheets[[1]]
rows <- getRows(sheet)
cells <- getCells(rows)


comments <- lapply(cells, getCellComment)


read_comments <- comments %>% 
  purrr::discard(is.null)

resp <- vector('list')

for (i in seq_along(read_comments)) {
  
  resp[[i]] <- read_comments[[i]]$getString()
  
}

resp

Answer 2

我认为任何现有的r包都无法解决这个问题，您最好采用VBA解决方案，例如以下将注释添加到单元格的解决方案：

Sub CommentToCell()
    ' loop through all the comments in the activesheet
    For Each cmt In ActiveSheet.Comments
        ' print the text of the comment unto the cell
        cmt.Parent = cmt.Text
    Next cmt
End Sub

我已纠正为Tidyxl程序包具有导入注释的功能，该注释记录在vignette中：

library(tidyxl)
path <- "path/of/ur.file.xlsx"
x <- xlsx_cells(path)
x[!is.na(x$comment), c("address", "comment")]

# A tibble: 9 x 2
  address comment                                          
  <chr>   <chr>                                            
1 P4      "Johnston, Melissa - ARS:\r\nTest Note"          
2 N6      "Johnston, Melissa - ARS:\r\nAnother stupid note"
3 O9      "Johnston, Melissa - ARS:\r\nsilly steer note"   
4 R11     "Mary Ashby:\r\nNo wt. - in 15SW"                
5 P4      "Johnston, Melissa - ARS:\r\nTest Note"          
6 N6      "Johnston, Melissa - ARS:\r\nAnother stupid note"
7 R11     "Mary Ashby:\r\nNo wt. - in 15SW"                
8 P4      "Johnston, Melissa - ARS:\r\nTest Note"          
9 R11     "Mary Ashby:\r\nNo wt. - in 15SW"

首先我们从地址中提取行号，然后left_join excel文件数据，并在注释上加上row_number

x[!is.na(x$comment), c("address", "comment")] %>%
     mutate(address=as.numeric(str_extract(address,"\\d+")))  -> cmts

readxl::read_excel(path) %>% mutate(address=row_number()) %>% left_join(cmts) -> ur.df

# just to see the result with the naked eye
ur.df %>% select(comment, everything())

# A tibble: 16 x 21
   comment Past  Section Treatment    ID `Tag Color` Owner notes
   <chr>   <chr> <chr>   <chr>     <dbl> <chr>       <chr> <lgl>
 1  NA     15E   E       moderate      7 Black       John  NA   
 2  NA     15E   E       moderate      9 Black       John  NA   
 3  NA     15E   E       moderate     10 Black       John  NA   
 4 "Johns… 15E   E       moderate     20 Black       John  NA   
 5 "Johns… 15E   E       moderate     20 Black       John  NA   
 6 "Johns… 15E   E       moderate     20 Black       John  NA   
 7  NA     23E   E       heavy        33 Black       Bob   NA   
 8 "Johns… 23E   E       heavy        36 Black       Bob   NA   
 9 "Johns… 23E   E       heavy        36 Black       Bob   NA   
10  NA     23E   E       heavy        39 Black       Bob   NA   
11  NA     23E   E       heavy        49 Black       Bob   NA   
12 "Johns… 23W   W       light        57 Black       Cher… NA   
13  NA     23W   W       light        58 Black       Cher… NA   
14 "Mary … 23W   W       light        61 Black       Cher… NA   
15 "Mary … 23W   W       light        61 Black       Cher… NA   
16 "Mary … 23W   W       light        61 Black       Cher… NA

编辑：加载所有工作表并获取其评论：

library(readxl)
library(purrr)

# creating address from sheet name and row number
x[!is.na(x$comment), c("sheet", "address", "comment")] %>%
     mutate(address=paste0(sheet,"-", str_extract(address,"\\d+")), sheet=NULL)  -> cmts

# loading the data and adding an address column that contains the sheet name
# and row number in the format `sheet-rownum`
excel_sheets(path) %>%
   map_dfr(~ read_excel(path, sheet=.x) %>%
               mutate(address=paste0(.x,'-',row_number()))) -> ur.df

# joining the comments
ur.df %>% left_join(cmts) -> ur.df
ur.df %>% select(comment, address) %>% filter(!is.na(comment))

# A tibble: 9 x 2
  comment                                           address
  <chr>                                             <chr>  
1 "Johnston, Melissa - ARS:\r\nTest Note"           2003-4 
2 "Johnston, Melissa - ARS:\r\nAnother stupid note" 2003-6 
3 "Johnston, Melissa - ARS:\r\nsilly steer note"    2003-9 
4 "Mary Ashby:\r\nNo wt. - in 15SW"                 2003-11
5 "Johnston, Melissa - ARS:\r\nTest Note"           2002-4 
6 "Johnston, Melissa - ARS:\r\nAnother stupid note" 2002-6 
7 "Mary Ashby:\r\nNo wt. - in 15SW"                 2002-11
8 "Johnston, Melissa - ARS:\r\nTest Note"           2001-4 
9 "Mary Ashby:\r\nNo wt. - in 15SW"                 2001-11

读取Excel单元格注释/注释并写入R数据框列

2 个答案: