
时间:2019-06-20 15:32:33

标签: r tidyr


structure(list(Title = c("Isn't It Romantic", "Isn't It Romantic", 
"Isn't It Romantic", "Isn't It Romantic", "Isn't It Romantic", 
"Isn't It Romantic", "Gully Boy", "Gully Boy", "Gully Boy", "Gully Boy", 
"Gully Boy", "Gully Boy", "The Wandering Earth", "The Wandering Earth", 
"The Wandering Earth", "The Wandering Earth", "The Wandering Earth", 
"The Wandering Earth", "How to Train Your Dragon: The Hidden World", 
"How to Train Your Dragon: The Hidden World", "How to Train Your Dragon: The Hidden World", 
"How to Train Your Dragon: The Hidden World", "How to Train Your Dragon: The Hidden World", 
"How to Train Your Dragon: The Hidden World", "American Woman", 
"American Woman", "Us", "Us", "Us", "Us", "Us", "Us", "The Wolf's Call", 
"The Wolf's Call", "Avengers: Endgame", "Avengers: Endgame", 
"Avengers: Endgame", "Avengers: Endgame", "Avengers: Endgame", 
"Avengers: Endgame", "The Silence", "The Silence", "The Silence", 
"The Silence", "The Silence", "The Silence", "My Little Pony: Equestria Girls: Spring Breakdown", 
"My Little Pony: Equestria Girls: Spring Breakdown"), Ratings = c("Internet Movie Database", 
"5.9/10", "Rotten Tomatoes", "68%", "Metacritic", "60/100", "Internet Movie Database", 
"8.4/10", "Rotten Tomatoes", "100%", "Metacritic", "65/100", 
"Internet Movie Database", "6.4/10", "Rotten Tomatoes", "74%", 
"Metacritic", "62/100", "Internet Movie Database", "7.6/10", 
"Rotten Tomatoes", "91%", "Metacritic", "71/100", "Rotten Tomatoes", 
"57%", "Internet Movie Database", "7.1/10", "Rotten Tomatoes", 
"94%", "Metacritic", "81/100", "Internet Movie Database", "7.6/10", 
"Internet Movie Database", "8.7/10", "Rotten Tomatoes", "94%", 
"Metacritic", "78/100", "Internet Movie Database", "5.2/10", 
"Rotten Tomatoes", "23%", "Metacritic", "25/100", "Internet Movie Database", 
"7.7/10")), row.names = c(NA, -48L), class = c("tbl_df", "tbl", 

enter image description here





6 个答案:

答案 0 :(得分:2)


df %>% group_by(Title) %>% 
  slice(match("Rotten Tomatoes", df$Ratings) + 1) %>%
  rename(rottentomatoes_rating = Ratings)


# A tibble: 2 x 6
# Groups:   Title [2]
  Title             Year  Rated     Released   Runtime rottentomatoes_rating
  <chr>             <chr> <chr>     <date>     <chr>   <chr>                
1 Gully Boy         2019  Not Rated 2019-02-14 153 min 100%                 
2 Isn't It Romantic 2019  PG-13     2019-02-13 89 min  68%     


答案 1 :(得分:2)

sumshyftw 的答案很好。


dt <- dt[dt$Ratings %like% "%",]
dt <- setnames(dt, "Ratings", "rottentomatoes_rating")


# A tibble: 2 x 6
  Title             Year  Rated     Released   Runtime rottentomatoes_rating
  <chr>             <chr> <chr>     <date>     <chr>   <chr>                
1 Isn't It Romantic 2019  PG-13     2019-02-13 89 min  68%                  
2 Gully Boy         2019  Not Rated 2019-02-14 153 min 100%  

我之所以使用%like% "%"是因为我认为完整的数据就像您的示例一样。

答案 2 :(得分:2)



# specify indexes of Rating companies
ids = seq(1, nrow(dt), 2)

# get rows of Rating companies
dt %>% slice(ids) %>%
  # combine with the rating values
  cbind(dt %>% slice(-ids) %>% select(RatingsValue = Ratings)) %>%
  # reshape dataset
  spread(Ratings, RatingsValue)

#                Title Year     Rated   Released Runtime Internet Movie Database Metacritic Rotten Tomatoes
# 1         Gully Boy 2019 Not Rated 2019-02-14 153 min                  8.4/10     65/100            100%
# 2 Isn't It Romantic 2019     PG-13 2019-02-13  89 min                  5.9/10     60/100             68%

答案 3 :(得分:1)


# using data.table
dt <- as.data.table(df)

# Index will hold whether the row is a Provider eg Rotten Tomatoes, or a value
dt[, Index:=rep(c("Provider", "Value"), .N/2)]
# Need an index to bind these together
dt[, Provider.Id:=rep(1:(.N/2), each=2), by=Title]

# segment out the Provider & Values in to columns
out <- dcast(dt, Title+Provider.Id~Index, value.var = "Ratings")
out[, Provider := NULL]

# now convert to full wide format 
out_df <- as.data.frame(dcast(out, Title~Provider, value.var="Value", fill=NA))

答案 4 :(得分:0)


# using data.table
dt <- as.data.table(df)

# groups the data set with by, and extracts the Ratings
# makes use of logic that the odd indeces hold the name of the provider,
# the even ones hold the values. Only works if this holds.
# It can probably be optimised a bit. dcast converts from long to required wide
# format
splitRatings <- function(Ratings){
  # e.g. Ratings=dt$Ratings[1:6]
  N <- length(Ratings)
  split_dt <- data.table(DB=Ratings[1:N %% 2 == 1],
                         Values=Ratings[1-(1:N %% 2) == 1])
  out <- dcast(split_dt, .~DB, value.var = "Values")
  out[, ".":=NULL]

# applies the function based on the by clause, returning the table embedded
dt2 <- dt[, splitRatings(Ratings), by=.(Title, Year, Rated, Released, Runtime)]

# convert back
out <- as.data.frame(dt2)

答案 5 :(得分:0)


df %>% 
  mutate(Value = ifelse(str_detect(Ratings, "\\d"), Ratings, NA)) %>% 
  fill(Value, .direction = "up") %>% 
  filter(!str_detect(Ratings, "\\d")) %>% 
  spread(Ratings, Value)