如何删除数字后面的斜杠

时间:2018-07-05 22:20:32

标签: r

我对txt文件有问题。我想删除数字后面的斜杠。我该怎么做?我也想用点代替逗号。我在这里复制文件内容。注意:第一行是标题。

Regions  X1  X2  X3  X4  X5  X6  X7    
Piemonte  /25,4 35.9  8.5  0.6  7.6  13.5  8.5    
Val_d_Aosta  /41,3 49.6  1.9  0.0  0.9  5.8  0.5    
Lombardia  /26,5 36.4  10.6  2.1  4.1  15.2  5.1    
Liguria  33.2  /44,0 5.3  0.6  3.6  12.4  0.9    
Trentino  /26,3 52.3  6.5  0.5  2.1  11.6  0.7    
Veneto  23.3  /41,6 6.3  9.7  3.2  14.8  1.1    
Friuli  25.3  /49,1 5.4  0.8  3.0  12.8  3.6    
Emilia_Romagna  /29,4 40.7  7.9  0.9  5.6  13.5  2.0    
Toscana  38.5  /37,2 5.0  2.6  3.1  12.2  1.4    
Umbria  32.0  /46,8 4.6  0.7  4.5  9.4  2.0    
Marche  26.5  /49,4 4.4  0.8  4.2  9.1  5.6    
Lazio 21.4  /35,5 6.2  1.8  4.4  23.0  7.7    
Abruzzo  33.7  /54,1 2.4  0.1  3.1  4.1  2.5    
Molise  24.0  /64,5 0.6  0.0  2.9  6.8  1.2    
Campania  /31,0 40.9  4.6  3.4  2.0  12.1  6.0    
Puglia  33.5  /46,3 3.8  0.2  2.4  11.2  2.6    
Basilicata /24,4 60.4  2.9  0.1  4.5  5.7  2.0    
Calabria  /28,5 54.3  3.6  0.3  1.8  8.5  3.0    
Sicilia  33.5  /48,9 4.8  0.3  2.9  7.9  1.7    
Sardegna  /32,1 54.6  3.6  0.1  2.3  5.5  1.8    
Italia  28.4  /40,3 7.2  1.4  4.2  14.1  4.4

2 个答案:

答案 0 :(得分:0)

我们可以使用gsub中的base R遍历所有数字列,并删除/并将,替换为.

df1[-1] <- lapply(df1[-1], function(x) 
         as.numeric(sub("[/]", "", gsub(",", ".", x))))
df1
#          Regions   X1   X2   X3  X4  X5   X6  X7
#1        Piemonte 25.4 35.9  8.5 0.6 7.6 13.5 8.5
#2     Val_d_Aosta 41.3 49.6  1.9 0.0 0.9  5.8 0.5
#3       Lombardia 26.5 36.4 10.6 2.1 4.1 15.2 5.1
#4         Liguria 33.2 44.0  5.3 0.6 3.6 12.4 0.9
#5        Trentino 26.3 52.3  6.5 0.5 2.1 11.6 0.7
#6          Veneto 23.3 41.6  6.3 9.7 3.2 14.8 1.1
#7          Friuli 25.3 49.1  5.4 0.8 3.0 12.8 3.6
#8  Emilia_Romagna 29.4 40.7  7.9 0.9 5.6 13.5 2.0
#9         Toscana 38.5 37.2  5.0 2.6 3.1 12.2 1.4
#10         Umbria 32.0 46.8  4.6 0.7 4.5  9.4 2.0
#11         Marche 26.5 49.4  4.4 0.8 4.2  9.1 5.6
#12          Lazio 21.4 35.5  6.2 1.8 4.4 23.0 7.7
#13        Abruzzo 33.7 54.1  2.4 0.1 3.1  4.1 2.5
#14         Molise 24.0 64.5  0.6 0.0 2.9  6.8 1.2
#15       Campania 31.0 40.9  4.6 3.4 2.0 12.1 6.0
#16         Puglia 33.5 46.3  3.8 0.2 2.4 11.2 2.6
#17     Basilicata 24.4 60.4  2.9 0.1 4.5  5.7 2.0
#18       Calabria 28.5 54.3  3.6 0.3 1.8  8.5 3.0
#19        Sicilia 33.5 48.9  4.8 0.3 2.9  7.9 1.7
#20       Sardegna 32.1 54.6  3.6 0.1 2.3  5.5 1.8
#21         Italia 28.4 40.3  7.2 1.4 4.2 14.1 4.4

数据

df1 <- structure(list(Regions = c("Piemonte", "Val_d_Aosta", "Lombardia", 
"Liguria", "Trentino", "Veneto", "Friuli", "Emilia_Romagna", 
"Toscana", "Umbria", "Marche", "Lazio", "Abruzzo", "Molise", 
"Campania", "Puglia", "Basilicata", "Calabria", "Sicilia", "Sardegna", 
"Italia"), X1 = c("/25,4", "/41,3", "/26,5", "33.2", "/26,3", 
"23.3", "25.3", "/29,4", "38.5", "32.0", "26.5", "21.4", "33.7", 
"24.0", "/31,0", "33.5", "/24,4", "/28,5", "33.5", "/32,1", "28.4"
), X2 = c("35.9", "49.6", "36.4", "/44,0", "52.3", "/41,6", "/49,1", 
"40.7", "/37,2", "/46,8", "/49,4", "/35,5", "/54,1", "/64,5", 
"40.9", "/46,3", "60.4", "54.3", "/48,9", "54.6", "/40,3"), X3 = c(8.5, 
1.9, 10.6, 5.3, 6.5, 6.3, 5.4, 7.9, 5, 4.6, 4.4, 6.2, 2.4, 0.6, 
4.6, 3.8, 2.9, 3.6, 4.8, 3.6, 7.2), X4 = c(0.6, 0, 2.1, 0.6, 
0.5, 9.7, 0.8, 0.9, 2.6, 0.7, 0.8, 1.8, 0.1, 0, 3.4, 0.2, 0.1, 
0.3, 0.3, 0.1, 1.4), X5 = c(7.6, 0.9, 4.1, 3.6, 2.1, 3.2, 3, 
5.6, 3.1, 4.5, 4.2, 4.4, 3.1, 2.9, 2, 2.4, 4.5, 1.8, 2.9, 2.3, 
4.2), X6 = c(13.5, 5.8, 15.2, 12.4, 11.6, 14.8, 12.8, 13.5, 12.2, 
9.4, 9.1, 23, 4.1, 6.8, 12.1, 11.2, 5.7, 8.5, 7.9, 5.5, 14.1), 
    X7 = c(8.5, 0.5, 5.1, 0.9, 0.7, 1.1, 3.6, 2, 1.4, 2, 5.6, 
    7.7, 2.5, 1.2, 6, 2.6, 2, 3, 1.7, 1.8, 4.4)), .Names = c("Regions", 
"X1", "X2", "X3", "X4", "X5", "X6", "X7"), 
 class = "data.frame", row.names = c(NA, 
-21L))

答案 1 :(得分:0)

一种tidyverse方法

df %>%
    mutate_at(vars(starts_with("X")), function(x) 
        as.numeric(gsub("/(\\d+),(\\d+)", "\\1.\\2", x)))
#          Regions   X1   X2   X3  X4  X5   X6  X7
#1        Piemonte 25.4 35.9  8.5 0.6 7.6 13.5 8.5
#2     Val_d_Aosta 41.3 49.6  1.9   0 0.9  5.8 0.5
#3       Lombardia 26.5 36.4 10.6 2.1 4.1 15.2 5.1
#4         Liguria 33.2 44.0  5.3 0.6 3.6 12.4 0.9
#5        Trentino 26.3 52.3  6.5 0.5 2.1 11.6 0.7
#6          Veneto 23.3 41.6  6.3 9.7 3.2 14.8 1.1
#7          Friuli 25.3 49.1  5.4 0.8   3 12.8 3.6
#8  Emilia_Romagna 29.4 40.7  7.9 0.9 5.6 13.5   2
#9         Toscana 38.5 37.2    5 2.6 3.1 12.2 1.4
#10         Umbria 32.0 46.8  4.6 0.7 4.5  9.4   2
#11         Marche 26.5 49.4  4.4 0.8 4.2  9.1 5.6
#12          Lazio 21.4 35.5  6.2 1.8 4.4   23 7.7
#13        Abruzzo 33.7 54.1  2.4 0.1 3.1  4.1 2.5
#14         Molise 24.0 64.5  0.6   0 2.9  6.8 1.2
#15       Campania 31.0 40.9  4.6 3.4   2 12.1   6
#16         Puglia 33.5 46.3  3.8 0.2 2.4 11.2 2.6
#17     Basilicata 24.4 60.4  2.9 0.1 4.5  5.7   2
#18       Calabria 28.5 54.3  3.6 0.3 1.8  8.5   3
#19        Sicilia 33.5 48.9  4.8 0.3 2.9  7.9 1.7
#20       Sardegna 32.1 54.6  3.6 0.1 2.3  5.5 1.8
#21         Italia 28.4 40.3  7.2 1.4 4.2 14.1 4.4

这会替换以X开头的所有列中的反斜杠和逗号,并将条目转换为numeric


样本数据

df <- read.table(text =
    "Regions  X1  X2  X3  X4  X5  X6  X7
Piemonte  /25,4 35.9  8.5  0.6  7.6  13.5  8.5
Val_d_Aosta  /41,3 49.6  1.9  0.0  0.9  5.8  0.5
Lombardia  /26,5 36.4  10.6  2.1  4.1  15.2  5.1
Liguria  33.2  /44,0 5.3  0.6  3.6  12.4  0.9
Trentino  /26,3 52.3  6.5  0.5  2.1  11.6  0.7
Veneto  23.3  /41,6 6.3  9.7  3.2  14.8  1.1
Friuli  25.3  /49,1 5.4  0.8  3.0  12.8  3.6
Emilia_Romagna  /29,4 40.7  7.9  0.9  5.6  13.5  2.0
Toscana  38.5  /37,2 5.0  2.6  3.1  12.2  1.4
Umbria  32.0  /46,8 4.6  0.7  4.5  9.4  2.0
Marche  26.5  /49,4 4.4  0.8  4.2  9.1  5.6
Lazio 21.4  /35,5 6.2  1.8  4.4  23.0  7.7
Abruzzo  33.7  /54,1 2.4  0.1  3.1  4.1  2.5
Molise  24.0  /64,5 0.6  0.0  2.9  6.8  1.2
Campania  /31,0 40.9  4.6  3.4  2.0  12.1  6.0
Puglia  33.5  /46,3 3.8  0.2  2.4  11.2  2.6
Basilicata /24,4 60.4  2.9  0.1  4.5  5.7  2.0
Calabria  /28,5 54.3  3.6  0.3  1.8  8.5  3.0
Sicilia  33.5  /48,9 4.8  0.3  2.9  7.9  1.7
Sardegna  /32,1 54.6  3.6  0.1  2.3  5.5  1.8
Italia  28.4  /40,3 7.2  1.4  4.2  14.1  4.4", header = T)