I am given a character vector:
tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
and I want to convert it to be in cm.
Please advise how can I do this?
答案 0 :(得分:2)
There are couple of methods to use
1) Read with fread
after pasting into a single string
library(data.table)
fread(paste(sub('"', "", df1$H), collapse="\n"), sep="'")[,
as.matrix(.SD) %*% c(30.48, 2.54)][,1]
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
2) Using gsubfn
library(gsubfn)
as.numeric(gsubfn("(\\d)'(\\d+)", ~ as.numeric(x) * 30.48 +
as.numeric(y) * 2.54, sub('"', '', df1$H)))
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
3) with separate
library(tidyverse)
df1 %>%
separate(H, into = c("H1", "H2"), convert = TRUE) %>%
transmute(H = H1 * 30.48 + H2 * 2.54)
# A tibble: 6 x 1
# H
# <dbl>
#1 188.
#2 178.
#3 165.
#4 155.
#5 165.
#6 163.
4) with measurements
library(measurements)
library(tidyverse)
df1 %>%
separate(H, into = c("H1", "H2"), convert = TRUE) %>%
transmute(H = conv_unit(H1, "ft", "cm") + conv_unit(H2, "inch", "cm"))
答案 1 :(得分:2)
One option is to extract all the numbers and convert it into a matrix and then perform the calculation.
mat <- stringr::str_extract_all(df$H, "\\d+", simplify = TRUE)
as.numeric(mat[, 1]) * 30.48 + as.numeric(mat[, 2]) * 2.54
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
where mat
is
# [,1] [,2]
#[1,] "6" "2"
#[2,] "5" "10"
#[3,] "5" "5"
#[4,] "5" "1"
#[5,] "5" "5"
#[6,] "5" "4"
First column is feet and second column inches.
And just for my own curiosity I wanted to solve this in base R
sapply(strsplit(sub("(\\d+)'(\\d+).*", "\\1-\\2", df$H), "-"), function(x)
as.numeric(x[1]) * 30.48 + as.numeric(x[2]) * 2.54)
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
This follows similar logic that extract 2 numbers from the string using sub
, split them using strsplit
and then for each of them convert it into numeric and perform the calculation.
答案 2 :(得分:2)
Using the stringi
package to extract the relevant units:
library(stringi)
Raw <- c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\"")
## Extract Feet units by regex searching for 1 or more digits followed by a '
Feet <- stri_extract_first_regex(Raw, "[[:digit:]]+(?=')"))
## Extract Inch units by regex searching for 1 or 2 digits followed by a "
Inches <- stri_extract_first_regex(Raw, "[[:digit:]]{1,2}(?=\")"))
## Combine Feet and Inches
TotalInches <- 12 * as.numeric(Feet) + as.numeric(Inches)
## Convert to cm
CM <- 2.54 * TotalInches
print(CM)
# [1] 187.96 177.80 165.10 154.94 165.10 162.56
If you need to do this for multiple columns, it might make sense to define the steps as a function at the top of your script so you can call it more concisely and don't need to store intermediate results in the global environment.
One other consideration taken in the function version here is to replace NA
matches with 0
values so that valid measurements like 1'
or 11"
return valid results instead of NA
as well.
FtInToCm <- function(x){
Feet <- as.numeric(stringi::stri_extract_first_regex(Raw, "[[:digit:]]+(?=')"))
Inches <- as.numeric(stringi::stri_extract_first_regex(Raw, "[[:digit:]]{1,2}(?=\")"))
return(2.54 * (12 * ifelse(is.na(Feet),0,Feet) + ifelse(is.na(Inches),0,Inches)))
}
FtInToCm(Raw)
#[1] 187.96 177.80 165.10 154.94 165.10 162.56
答案 3 :(得分:1)
> dat <- tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
> dat$inches <- gsub("[\"]", "",dat$H) %>%
strsplit(., "'") %>%
lapply(., function(x) {
x <- as.numeric(x);
(x[1]*30.48) + (x[2]/12)*30.48
}) %>%
unlist
> dat
# A tibble: 6 x 2
H inches
<chr> <dbl>
1 "6'2\"" 188.
2 "5'10\"" 178.
3 "5'5\"" 165.
4 "5'1\"" 155.
5 "5'5\"" 165.
6 "5'4\"" 163.
You can even use map
instead of lapply
> gsub("[\"]", "",dat$H) %>%
strsplit(., "'") %>%
map_dbl(function(x){
x <- as.numeric(x)
(x[1]*30.48) + (x[2]/12)*30.48
})
[1] 187.96 177.80 165.10 154.94 165.10 162.56
答案 4 :(得分:1)
我添加了另一个答案,只是为了给您另一个选择,因为我已经在看到其他答案之前就已经写了它。
我先将字符串转换为数字,然后转换单位:
library(dplyr)
library(stringr)
df <- tibble(H = c("6'2\"", "5'10\"", "5'5\"", "5'1\"", "5'5\"", "5'4\""))
df %>%
mutate(foot = str_extract(H, "^\\d+'"),
inch = str_extract(H, "\\d+\"$")) %>% # split foot from inch
mutate(foot = as.numeric(str_remove(foot, "[^\\d]")),
inch = as.numeric(str_remove(inch, "[^\\d]"))) %>% # convert to numeric
mutate(H_new = cm(foot * 12) + cm(inch)) # convert units
# A tibble: 6 x 4
H foot inch H_new
<chr> <dbl> <dbl> <dbl>
1 "6'2\"" 6 2 188.
2 "5'10\"" 5 10 178.
3 "5'5\"" 5 5 165.
4 "5'1\"" 5 1 155.
5 "5'5\"" 5 5 165.
6 "5'4\"" 5 4 163.