我试图创建一个我可以提供日期时间数据框的函数,比如下面的dfTdata data.frame,指定一个特定的日期时间列,如trWindDateTime列,并让它返回另一个日期时间列,与第一个日期时间列的平均差异最小。这结合了我真正挣扎的两件事,在R和循环中使用datetime列。我有一些代码,我发现我一直试图在下面一起破解。任何提示将不胜感激。
Code:
## Taking difference between specified column and all other columns in data.frame. I know this code doesn't exactly do what I'm trying to accomplish here. What I would like to do is pass it dfTdata and specifiy a column name, and have it take the difference between that date column and all the others.
funcDiff<-function(x){
for(i in seq_along(x)){
x[, i] <- x[, i]-x[, (i+1)]
}
}
##Creating data.frame of diff between columns
dfDateDiff<-funcDiff(dfTdata)
## Trying to take the mean for each column of differences
dfMeanDiff<-lapply(dfDateDiff,mean)
## Trying to return the column with the minimum average mean
ColNum <- which(dfMeanDiff==min(dfMeanDiff))
Data:
dput(dfTdata)
structure(list(TrWindDateTime = c(1422683580, 1422559320, 1423162920,
1423598220, 1423697880, 1423545300, 1424580900, 1424242200, 1426370640,
1426616460, 1426479960, 1426721700, 1427121780, 1428707940, 1430257500,
1432256100, 1433912820, 1435505100, 1437770880, 1440517860, 1440373200,
1440028200, 1446993480, 1449501900, 1449095520, 1449449340, 1450308780,
1449771840, 1438615980, 1439432400, 1440340980, 1440090360, 1440027300,
1439232120, 1440593340, 1438405920, 1442772900, 1443192720, 1443110040,
1443153600, 1441950840, 1443343800, 1443723720, 1445739720, 1443759360,
1444959300, 1444851960, 1445704920, 1443314280, 1443400980, 1443834480,
1445292540, 1447201440, 1447201560, 1447535460, 1447542600, 1448128440,
1446287220, 1445973000, 1448101560, 1447932900, 1446932220, 1447351680,
1447981980, 1447313160, 1447693140, 1447702980, 1447801140, 1446873960,
1447267260, 1446932700, 1448061660, 1447280580, 1447534680, 1446919980,
1447698180, 1447057440, 1447205880, 1447693440, 1447016700, 1448043660,
1447376220, 1447931340, 1449606480, 1448952120, 1448352120, 1448165040,
1450035060, 1449361860, 1450128060, 1449465540, 1449707520, 1449247140,
1449350520, 1449704160, 1449717180, 1449711360, 1449768540, 1449862440,
1449756300), WindDateTime = c(1422690780, 1422560640, 1423163280,
1423600200, 1423701000, 1423546920, 1424582880, 1424243460, 1426373160,
1426617000, 1426484820, 1426725600, 1427123580, 1428708420, 1430259240,
1432260660, 1433914800, 1435507800, NA, 1440519180, 1440373800,
1440028200, 1446994860, 1449503460, 1449098340, 1449449520, 1450310220,
1449772620, 1438616580, 1439432400, 1440341460, 1440090600, 1440028440,
1439232960, 1440594240, 1438406340, 1442774040, 1443193200, 1443110940,
1443154980, 1441952640, 1443353400, 1443724440, 1445740980, 1443761220,
1444962360, 1444853940, 1445705040, 1443316200, 1443402420, 1443835560,
1445293380, 1447202340, 1447202760, 1447535760, 1447543080, 1448130120,
1446287520, 1445974860, 1448101740, 1447945680, 1446934620, 1447353240,
1447984020, 1447313760, 1447693740, 1447705080, 1447801920, 1446874620,
1447271580, 1446933720, 1448066700, 1447283160, 1447534860, 1446920820,
1447699380, 1447058820, 1447207200, 1447694400, 1447017120, 1448044320,
1447376760, 1447931580, 1449607020, 1448952960, 1448354640, 1448167080,
1450036500, 1449363240, 1450128060, 1449466380, 1449707940, 1449247500,
1449357780, 1449704760, 1449718200, 1449712800, 1449769200, 1449864360,
1449758340), TomDateTime = c(1422688560, NA, 1423162980, 1423599540,
1423709640, NA, 1424580900, 1424242380, 1426380780, 1426617840,
1426486620, 1426722660, 1427122380, 1428709200, 1430257740, 1432275120,
1433921340, 1435508940, 1437769140, 1440517980, 1440373200, 1440027360,
1446993540, 1449503280, 1449097860, 1449449520, 1450310520, 1449773220,
1438616040, 1439431500, 1440342840, 1440091260, 1440027480, 1439232120,
1440593400, 1438407900, 1442781360, 1443193560, 1443110040, 1443153660,
1441951500, 1443352200, 1443724500, NA, 1443763380, 1444968780,
1444853580, 1445706240, 1443315420, 1443401340, 1443834540, 1445292540,
1447201440, 1447201620, 1447538340, 1447554360, 1448128800, 1446287220,
1445989380, 1448109540, 1447932900, 1446934260, 1447351800, 1447982520,
NA, 1447693500, 1447703040, 1447802640, 1446875880, 1447275360,
1446940440, 1448061720, 1447282320, 1447548840, 1446921780, 1447698240,
1447058280, 1447206540, 1447703700, 1447016760, 1448052060, 1447376220,
1447936920, 1449607500, 1448956380, 1448353920, 1448166120, 1450035900,
1449363540, 1450126920, 1449465540, 1449707580, 1449247500, 1449350520,
1449716460, 1449730920, 1449717120, 1449769200, 1449862740, 1449757320
), CircleDateTime = c(1422688740, 1422560280, NA, 1423599720, 1423707120,
1423546800, 1424591940, 1424242800, 1426380900, 1426617840, 1426484640,
1426724280, 1427122680, 1428708720, 1430257800, 1432260720, 1433921340,
1435509060, 1437770880, 1440518220, 1440373380, 1440030660, 1446993780,
1449503520, 1449097560, 1449449580, 1450310100, 1449772680, 1438616400,
1439431740, 1440343440, 1440091620, 1440027600, 1439232540, 1440594060,
1438408020, 1442781660, 1443195120, 1443110580, 1443153720, 1441952220,
1443353160, 1443725040, 1445740920, 1443763500, 1444965120, 1444853760,
1445706960, 1443315420, 1443401640, 1443835560, 1445292660, 1447201980,
1447201680, 1447538340, 1447542900, 1448128800, 1446287280, 1445991480,
1448110380, 1447932960, 1446934860, 1447353540, 1447982940, NA,
1447693320, 1447703220, 1447802820, 1446876000, 1447271820, 1446933480,
1448062020, 1447283100, 1447549020, 1446921300, 1447698300, 1447058280,
1447206840, 1447701300, 1447017240, 1448044320, 1447376340, 1447937700,
1449607380, 1448952840, 1448354280, 1448166960, 1450036800, 1449363300,
1450129380, 1449466020, 1449707820, 1449247680, 1449350640, 1449707100,
1449718440, 1449718500, 1449769920, 1449864600, 1449757560),
SteamDateTime = c(NA, 1422568800, 1423200600, 1423607880,
NA, NA, NA, NA, 1426610820, 1426628100, 1426489860, NA, 1427138220,
1428715200, NA, NA, 1433922660, 1435511100, NA, 1440545100,
1440387480, NA, 1447015500, 1449516420, 1449100500, 1449453600,
1450328820, 1449777000, 1438638480, NA, 1440351900, 1440105240,
1440053760, 1439255400, 1440629040, 1438411500, 1442808420,
1443210900, 1443148620, NA, NA, NA, NA, NA, NA, NA, NA, 1445724660,
NA, NA, 1443856860, 1445314620, 1447225440, NA, 1447566360,
NA, NA, 1446309660, NA, 1448127000, 1447964100, 1446969300,
1447365240, NA, NA, 1447728960, 1447729620, NA, NA, 1447300020,
1446963840, 1448076900, NA, 1447572600, 1446937020, 1447717200,
1447100280, NA, 1447734360, 1447051080, 1448075040, 1447388280,
1447956000, 1449641580, NA, 1448412600, NA, 1450056840, 1449381360,
NA, NA, 1449736800, 1449273600, 1449390900, 1449719340, NA,
NA, 1449787440, NA, 1449772080), AnyDateTime = c(NA, 1422561120,
1423200180, 1423607880, NA, NA, NA, NA, 1426404420, 1426628100,
1426489860, 1426741080, 1427138220, 1428715200, NA, NA, 1433922660,
1435511100, 1437781680, 1440545100, 1440387480, NA, 1447015500,
1449516420, 1449100500, 1449453600, 1450328820, 1449777000,
1438638480, NA, 1440351900, 1440105240, 1440053760, 1439255400,
1440629040, 1438411500, 1442808420, 1443210900, 1443148620,
NA, NA, NA, NA, NA, NA, NA, NA, 1445724660, NA, NA, 1443856860,
1445314620, 1447225440, NA, 1447566360, NA, NA, 1446309660,
NA, 1448127000, 1447964100, 1446969300, 1447365240, NA, NA,
1447728960, 1447729620, NA, NA, 1447300020, 1446963840, 1448076900,
NA, 1447572600, 1446937020, 1447717200, 1447100280, NA, 1447734360,
1447051080, 1448075040, 1447388280, 1447956000, 1449641580,
NA, 1448412600, NA, 1450056840, 1449381360, NA, NA, 1449736800,
1449273600, 1449390900, 1449719340, NA, NA, 1449787440, 1449894600,
1449772080)), .Names = c("TrWindDateTime", "WindDateTime",
"TomDateTime", "CircleDateTime", "SteamDateTime", "AnyDateTime"
), row.names = c(NA, 100L), class = "data.frame")
答案 0 :(得分:0)
这是你之后的事吗?您将引用列的名称作为字符串传递给它,并告诉它您想要什么。如果默认为您提供与参考列之间的最小(绝对值)时间差的列。您也可以询问它的列名,列的名称以及参考列和输出列之间的差异。
# Run this to convert to POSIXct
library('lubridate')
library('magrittr') # this is just for the first pipe
dfTdata <- lapply(dfTdata, function(x) as.POSIXct(x, origin = '1970-01-01')) %>% as.data.frame(.)
funcDiff <- function(df_col, data = dfTdata, give_back = ''){
othernames <- names(dfTdata)[!names(dfTdata) == df_col]
coldifs <- lapply(othernames, function(x) mean(abs(data[[x]] - data[[df_col]]), na.rm = TRUE))
names(coldifs) <- othernames
min_col_ind <- which(unlist(coldifs) == min(unlist(coldifs)))
min_col <- names(min_col_ind)
if(give_back == 'name'){
return(min_col)
}else if(give_back == 'info'){
return(coldifs[min_col])
}else{
return(data[[min_col]])
}
}
# Use like this
funcDiff('TrWindDateTime')
funcDiff('TrWindDateTime', give_back = 'info')
答案 1 :(得分:0)
为了给您提供另一个选项,下面是使用data.table
包执行此操作的版本。在下面的函数中,结果将是列的差异和名称。您还可以指定希望差异的单位。
library(data.table)
#------------------------------------------------------
# Turn into a data.table and convert to datetime class
#------------------------------------------------------
dfTdata <- as.data.table(dfTdata)
nms <- names(dfTdata)
dfTdata[, (nms) := lapply(.SD, as.POSIXct, origin = '1970-01-01'), .SDcol = nms]
#--------------------------------
# Function to calculate min time
#--------------------------------
findMinAvgTime <- function(ds, var, unit = 'mins'){
difunits <- c("auto", "secs", "mins", "hours", "days", "weeks")
stopifnot(unit %in% difunits, !('data.table' %in% class(ds)),
!(var %in% names(ds)))
# Variables to subtract on
othVras <- setdiff(names(ds), var)
# Calculate Differences and Mean
res <-
ds[, lapply(.SD, function(x)
mean(as.numeric(difftime(get(var), x, units = unit)),
na.rm = TRUE)),
.SDcol = othVras]
# Unlist the table to a vector
minres <- unlist(res[, (var) := NULL])
return(minres[minres == min(minres)])
}
#--------------------------------
# Run Function
#--------------------------------
findMinAvgTime(dfTdata, 'TrWindDateTime')