我有一个包含2列的大型数据框的子集,看起来像这样
c1 c2
gym1 Thu:8:00 AM -10:30 PM;Fri: 8:00 AM -9:00 PM
gym2 Wed:7:00 AM-4:00 PM
gym3 Mon:12:00 PM - 6:00 PM;Tue:12:00 PM - 7:00 PM;Wed:10:00 AM -10:00 PM
gym4 Sat:8:00 AM -10:30 PM;Sun: 8:00 AM -9:00 PM
gym5 Sat:8:00 AM -10:30 PM;Sun: 8:00 AM -12:00 PM
c1包含健身房名称,c2包含一周中的日期以及以字符格式打开的时间。
我将如何解析c2和1.发现哪些健身房开放时间超过x小时2.发现哪些健身房在上午9点后打开?我猜我会在数据框的末尾添加两列,其值为TRUE或FALSE,但我不知道如何达到这一点。
非常感谢任何帮助或指导。谢谢。
答案 0 :(得分:0)
以下示例非常冗长,当然可以简化。但是,我相信它符合您的要求。
它假定您的日期都符合以下(格式不一致)的方式。
它返回一个更大的数据框,然后提供:
每天的开放时间
每天关闭时间
每天健身房开放的持续时间
如果健身房在特定日期开放超过x小时
最后,它确定健身房是否在任何一天上午9点(或上午9点)开放
最后它确定健身房在任何一天的开放时间是否大于x小时
df<-data.frame(c1=c("gym1","gym2","gym3","gym4","gym5"),c2=c("Thu:8:00 AM -10:30 PM;Fri: 8:00 AM -9:00 PM",
"Wed:7:00 AM-4:00 PM",
"Mon:12:00 PM - 6:00 PM;Tue:12:00 PM - 7:00 PM;Wed:10:00 AM -10:00 PM",
"Sat:8:00 AM -10:30 PM;Sun: 8:00 AM -9:00 PM",
"Sat:8:00 AM -10:30 PM;Sun: 8:00 AM -12:00 PM"))
# Remove white space to standardise
df$c2 <- gsub(" +","",df$c2)
# standardise time into hh:mm
df$c2 <- gsub(":([1-9]):",":0\\1:",df$c2)
df$c2 <- gsub("-([1-9]):","-0\\1:",df$c2)
# Lowercase the text
df$c2 <- tolower(df$c2 )
library(stringr)
# Open for greater than x hours
x <- 7
for(i in c("mon","tue","wed","thu","fri","sat","sun")) {
tmp <- data.frame(stringr::str_locate(df$c2,i))["end"]
df <- cbind(df,tmp)
df[,paste0(i,"_open")] <- NA
df[,paste0(i,"_closed")] <- NA
df[!is.na(df$end),paste0(i,"_open")] <- str_sub(df$c2[!is.na(df$end)],df$end[!is.na(df$end)]+2,df$end[!is.na(df$end)]+8)
df[!is.na(df$end),paste0(i,"_closed")] <- str_sub(df$c2[!is.na(df$end)],df$end[!is.na(df$end)]+10,df$end[!is.na(df$end)]+16)
df[,paste0(i,"_duration")] <- NA
df[,paste0(i,"_duration")] <-
as.numeric(difftime(
strptime(df[,paste0(i,"_closed")], "%I:%M%p" ),
strptime(df[,paste0(i,"_open")], "%I:%M%p" ),
units='hours')
)
# open for greater than x?
df[,paste0(i,"_open_greater_than_x_hours")] <- FALSE
df[which(df[,paste0(i,"_duration")] >= x),paste0(i,"_open_greater_than_x_hours")] <- TRUE
# open after 9 am?
df[,paste0(i,"_open_after_9am")] <- FALSE
df[,paste0(i,"_open_after_9am")] <- strptime(df[,paste0(i,"_open")], "%I:%M%p" ) >= strptime("09:00am", "%I:%M%p" )
df$end <- NULL
}
# Determine if a gym opens after (or at) 9am on at least one day
df$any_day_open_after_9am <- rowSums(df[,names(df)[grepl("after_9",names(df))]],na.rm = T) > 1
# Determine if a gym is open for greater than x hours on at least one day
df$open_greater_than_x_hours <- rowSums(df[,names(df)[grepl("open_greater_tha",names(df))]],na.rm = T) > 1
答案 1 :(得分:0)
第一步是将数据转换为更整洁的格式。
@UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate {
var persistentContainer: NSPersistentContainer!
var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
createContainer { container in
self.persistentContainer = container
let storyboard = self.window?.rootViewController?.storyboard
guard let vc = storyboard?.instantiateViewController(withIdentifier: "RootViewController") as? RootViewController
else { fatalError("Cannot instantiate root view controller") }
vc.managedObjectContext = container.viewContext
self.window?.rootViewController = vc
}
return true
}
func createContainer(completion: @escaping (NSPersistentContainer) -> ()) {
let container = NSPersistentContainer(name: "MyDataModel")
container.loadPersistentStores { _, error in
guard error == nil else { fatalError("Failed to load store: \(error)") }
DispatchQueue.main.async { completion(container) }
}
}
}
天数进入字符向量的列表列。然后strsplit
每天都有一行。
然后更多的清洁。
unnest
一旦整洁,只需将library("lubridate")
library("tidyverse")
df <- read_table("c1 c2
gym1 Thu:8:00 AM -10:30 PM;Fri: 8:00 AM -9:00 PM
gym2 Wed:7:00 AM-4:00 PM
gym3 Mon:12:00 PM - 6:00 PM;Tue:12:00 PM - 7:00 PM;Wed:10:00 AM -10:00 PM
gym4 Sat:8:00 AM -10:30 PM;Sun: 8:00 AM -9:00 PM
gym5 Sat:8:00 AM -10:30 PM;Sun: 8:00 AM -12:00 PM
")
df_tidied <-
df %>%
mutate(c2 = strsplit(c2, ";")) %>%
unnest %>%
separate(c2, c("open_day", "times"), sep = ":", extra = "merge") %>%
mutate(times = gsub(" ", "", times)) %>%
separate(times, c("open_time", "close_time"), sep = "-") %>%
mutate(
open_time = parse_date_time(open_time, "%I:%M%p"),
close_time = coalesce(
parse_date_time(close_time, "%I:%M%p"),
open_time + hours(1)),
opening_hours = close_time - open_time)
df_tidied
#> # A tibble: 10 x 5
#> c1 open_day open_time close_time opening_hours
#> <chr> <chr> <dttm> <dttm> <time>
#> 1 gym1 Thu 0000-01-01 08:00:00 0000-01-01 22:30:00 14.5
#> 2 gym1 Fri 0000-01-01 08:00:00 0000-01-01 21:00:00 13
#> 3 gym2 Wed 0000-01-01 07:00:00 0000-01-01 16:00:00 9
#> 4 gym3 Mon 0000-01-01 12:00:00 0000-01-01 18:00:00 6
#> 5 gym3 Tue 0000-01-01 12:00:00 0000-01-01 19:00:00 7
#> 6 gym3 Wed 0000-01-01 10:00:00 0000-01-01 11:00:00 1
#> 7 gym4 Sat 0000-01-01 08:00:00 0000-01-01 22:30:00 14.5
#> 8 gym4 Sun 0000-01-01 08:00:00 0000-01-01 21:00:00 13
#> 9 gym5 Sat 0000-01-01 08:00:00 0000-01-01 22:30:00 14.5
#> 10 gym5 Sun 0000-01-01 08:00:00 0000-01-01 12:00:00 4
加tally
加opening_hours
。
c1
答案 2 :(得分:0)
df=dat%>%tidytext::unnest_tokens(word, c2, token = strsplit, split = ";")%>%
separate(word,c("day","open_time","close_time"),"(?<=[a-z]):|-")%>%
mutate(duration=strptime(close_time,"%I:%M %p")-strptime(open_time,"%I:%M %p"))
df
c1 day open_time close_time duration
1 gym1 thu 8:00 am 10:30 pm 14.5 hours
2 gym1 fri 8:00 am 9:00 pm 13.0 hours
3 gym2 wed 7:00 am 4:00 pm 9.0 hours
4 gym3 mon 12:00 pm 6:00 pm 6.0 hours
5 gym3 tue 12:00 pm 7:00 pm 7.0 hours
6 gym3 wed 10:00 am 10:00 pm 12.0 hours
7 gym4 sat 8:00 am 10:30 pm 14.5 hours
8 gym4 sun 8:00 am 9:00 pm 13.0 hours
df%>%group_by(c1)%>%
mutate(openafter9=as.numeric(format(strptime(open_time,"%I:%M %p"),"%I"))>9,
Tot_Hrs_opn=sum(duration))##You can decide to use summarize but remember opening hour may depend on the day so you need to be careful
# A tibble: 8 x 7
# Groups: c1 [4]
c1 day open_time close_time duration openafter9 Tot_Hrs_opn
<chr> <chr> <chr> <chr> <time> <lgl> <time>
1 gym1 thu 8:00 am 10:30 pm 14.5 hours FALSE 27.5 hours
2 gym1 fri 8:00 am 9:00 pm 13.0 hours FALSE 27.5 hours
3 gym2 wed 7:00 am 4:00 pm 9.0 hours FALSE 9.0 hours
4 gym3 mon 12:00 pm 6:00 pm 6.0 hours TRUE 25.0 hours
5 gym3 tue 12:00 pm 7:00 pm 7.0 hours TRUE 25.0 hours
6 gym3 wed 10:00 am 10:00 pm 12.0 hours TRUE 25.0 hours
7 gym4 sat 8:00 am 10:30 pm 14.5 hours FALSE 27.5 hours
8 gym4 sun 8:00 am 9:00 pm 13.0 hours FALSE 27.5 hours