如何从文件名中删除文件扩展名并将其分配给列名?

时间:2016-10-24 20:37:54

标签: r dplyr

我有一个具有通用列名的数据框A:V1 - V24。我有19个列名存储在一个单独的数据帧B中,我需要分配给A.我想:

  1. 从数据框B中的列名列表中删除.tif。
  2. 我手动执行的前5个列名称。但是应该按照给出的顺序从列表B中读取其余部分。基本上,V6 = bio_1,V7 = bio_10等
  3. 代码:

    library(dplyr)
    B <- read.table('filenames.txt')
    B$V1 <- as.character(B$V1)
    
    B <- B %>%
      mutate(col2 = strsplit(V1, "\\.")[[1]][1])
    

    此外,

    A <- read.csv('futuredata.csv',header=F)
    
    A <- A %>%
      rename(ID = V1, gauge = V2, lat = V3, lon = V4, area = V5) %>%
    

    数据:

    dput(A)

    structure(list(V1 = 1:3, V2 = c(1094000L, 1100600L, 1096000L), 
        V3 = c(-71.506667, -71.215278, -71.658333), V4 = c(42.8575, 
        42.568056, 42.634167), V5 = c(442.888, 94.5346, 170.6802), 
        V6 = c(73.805907, 91.644231, 80.292398), V7 = c(190.198312, 
        205.201923, 196.80117), V8 = c(-50.635021, -26.259615, -43.94152
        ), V9 = c(1136.151899, 1118.826923, 1112.157895), V10 = c(113.563291, 
        114.557692, 112.538012), V11 = c(84.995781, 84.105769, 84.327485
        ), V12 = c(7.845992, 8.846154, 8), V13 = c(311.603376, 308.817308, 
        307.076023), V14 = c(264.763713, 255.980769, 262.22807), 
        V15 = c(281.383966, 255.980769, 269.672515), V16 = c(274.626582, 
        281.442308, 271.836257), V17 = c(121.006329, 115.336538, 
        122.830409), V18 = c(30.763713, 30.826923, 31), V19 = c(9281.014768, 
        8928.057692, 9269.432749), V20 = c(267.883966, 280.5, 274.900585
        ), V21 = c(-119.333333, -90.048077, -113.754386), V22 = c(387.2173, 
        370.548077, 388.654971), V23 = c(28.236287, -0.096154, 34.888889
        ), V24 = c(-37.721519, 205.201923, -13.134503)), .Names = c("V1", 
    "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", 
    "V12", "V13", "V14", "V15", "V16", "V17", "V18", "V19", "V20", 
    "V21", "V22", "V23", "V24"), row.names = c(NA, 3L), class = "data.frame")
    

    dput(B)

    structure(list(V1 = structure(1:19, .Label = c("bio_1.tif", "bio_10.tif", 
    "bio_11.tif", "bio_12.tif", "bio_13.tif", "bio_14.tif", "bio_15.tif", 
    "bio_16.tif", "bio_17.tif", "bio_18.tif", "bio_19.tif", "bio_2.tif", 
    "bio_3.tif", "bio_4.tif", "bio_5.tif", "bio_6.tif", "bio_7.tif", 
    "bio_8.tif", "bio_9.tif"), class = "factor")), .Names = "V1", class = "data.frame", row.names = c(NA, 
    -19L))
    

    问题:

    1. 删除.tif。
    2. 后,我找不到正确的列表
    3. 不确定如何将它们分配给数据集A

3 个答案:

答案 0 :(得分:1)

也许试试这个:

A <- 
  A %>% rename_(.dots = setNames(names(.), c("ID", "gauge","lat","lon","area",
                                             tools::file_path_sans_ext(B$V1))))

这是dplyr方式:

"queue": {
  "tasks": {
    ".indexOn": [
      "_state"
    ],
    "$id": {
      ".read": "auth !== null && ((!data.exists()) || (data.child('user').val() === auth.uid))",
      ".write": "auth !== null && ((!data.exists() && newData.child('user').val() === auth.uid && newData.child('_state').val() === '<start state goes here>') || (data.exists() && data.child('user').val() === auth.uid && !newData.exists()))"
    }
  }
}

答案 1 :(得分:0)

一个简单的基础R解决方案:

setNames(
     A,
     c(
      c("ID", "gauge", "lat", "lon", "area"),
      gsub("\\.tif", "", unlist(B))
      )
  )

ID   gauge       lat      lon    area    bio_1   bio_10    bio_11   bio_12 1  1 1094000 -71.50667
    42.85750 442.8880 73.80591 190.1983 -50.63502 1136.152 2  2 1100600 -71.21528 42.56806  94.5346 91.64423 205.2019 -26.25962
1118.827 3  3 1096000 -71.65833 42.63417 170.6802 80.29240 196.8012 -43.94152
    1112.158
        bio_13   bio_14   bio_15   bio_16   bio_17   bio_18   bio_19    bio_2 1 113.5633 84.99578 7.845992 311.6034 264.7637 281.3840 274.6266
    121.0063 2 114.5577 84.10577 8.846154 308.8173 255.9808 255.9808 281.4423 115.3365 3 112.5380 84.32748 8.000000 307.0760 262.2281 269.6725 271.8363 122.8304
         bio_3    bio_4    bio_5      bio_6    bio_7     bio_8     bio_9 1 30.76371 9281.015 267.8840 -119.33333 387.2173 28.236287
-37.72152 2 30.82692 8928.058 280.5000  -90.04808 370.5481 -0.096154 205.20192 3 31.00000 9269.433 274.9006 -113.75439 388.6550 34.888889 -13.13450

答案 2 :(得分:0)

处于非常基本的水平(并删除B中的因子)

Bvalue <- as.character(B$V1)
Bshort <- substring(Bvalue, 1, nchar(Bvalue)-4)
names(A)[6:24] <- Bshort