我使用Graphite(http://graphite.wikidot.com/)来记录各种服务的性能统计信息,我们可以通过API访问这些服务。每项服务都有一些不同的指标,每个指标都有一些不同的统计数据。要遍历所有这些以获取我们想要的统计数据,我已经编写了3个嵌套for循环,如下所示,以创建必要的URL。然后它变得更糟。我们刚刚为此层次结构引入了另一个级别,因为每个服务可以有多个级别,因此它们都需要唯一的ID。在让这个更加混乱之前,我确信必须有一个更简单的方法,但谷歌搜索没有发现任何东西。有关最佳方法的任何想法吗?
dir.current <- getwd()
dir.create(file.path(dir.current, "All Data"), showWarnings = FALSE)
dir.create(file.path(dir.current, "Charts"), showWarnings = FALSE)
# Set the grab parameters
graphite.ip <- "192.168.0.16:8080"
from <- list(hour="18", min="00", year="2013", month="09", day="18")
until <- list(hour="10", min="50", year="2013", month="09", day="19")
test.name <- "multinode"
# Builds the ugly parts of the URL.
graphite.ip <- paste("http://", graphite.ip, "/render?", sep="")
from <- paste("from=", from$hour, "%3A", from$min, "_", from$year, from$month, from$day, sep="")
until <- paste("&until=", until$hour, "%3A", until$min, "_", until$year, until$month, until$day, sep="")
test.name <- paste("&target=", test.name, sep="")
# A few variables for common statistics used.
stats.few <- c("count", "m1_rate", "m5_rate", "m15_rate", "mean_rate")
stats.many <- c("count", "m1_rate", "mean", "mean_rate", "p95", "stddev")
stats.memory <- c("total.used")
# Specify which metrics to grab for which services
engine.stats <- list("event-timer"=stats.many, "memory"=stats.memory)
journaler.stats <- list("journaler-rate"=stats.few, "memory"=stats.memory)
notification.stats <- list("notification-rate"=stats.few, "memory"=stats.memory, "reaction-tenant-one-PT4-time"=stats.many)
eventsin.stats <- list("Incoming"=stats.few, "memory"=stats.memory)
broker.stats <- list("memory"=stats.memory, "events"=stats.few)
# Specify which services you're interested in (should be above as well)
services <- list("engine"=engine.stats, "notification"=notification.stats, "rest"=eventsin.stats, "broker"=broker.stats)
merge.count <- 1
# Loops over everything above to grab the CSVs
for (service in names(services)) {
for (metric in names(services[[service]])) {
for (stat in services[[service]][[metric]]) {
target <- paste(test.name, service, metric, stat, sep=".")
data.name <- paste(service, metric, stat, sep=".")
print(data.name) # Visual indicator
# Download the graphs
url.png <- paste(graphite.ip, from, until, target, "&width=800&height=600", "&format=png", sep="")
setwd(file.path(dir.current, "Charts"))
download.file(url.png, paste(data.name, ".png", sep=""), quiet=TRUE)
# Download, clean and merge CSVs
url.csv <- paste(graphite.ip, from, until, target, "&format=csv", sep="")
data <- read.csv(url.csv, col.names = c("Data Name", "Date", data.name), header=FALSE)
data[1] <- NULL # Cleans up the data
# If a column has integers larger than 2^31, rewrite the data in millions.
if (sapply(data[2], max, na.rm=TRUE) >= 2^31) {
data[2] = data[2]/10^6
}
if (merge.count == 1) {
data.merged <- data
merge.count = merge.count + 1
} else {
data.merged = cbind(data.merged, data[2])
}
csv.name <- paste(service, metric, stat, "csv", sep=".")
setwd(file.path(dir.current, "All Data"))
write.csv(data, csv.name, row.names=FALSE)
}
}
}
setwd(file.path(dir.current))
write.csv(data.merged, "MergedData.csv", row.names=FALSE)
# Print summary of all statistics
# print(summary(data.merged))
# Print a mean and sd of all the columns
print("Column Means:")
print(colMeans(data.merged[,-1], na.rm=TRUE))
print("Column Standard Deviations:")
print(sapply(data.merged[,-1], sd, na.rm=TRUE))
print("Download and merging complete.")
答案 0 :(得分:1)
我认为这个问题是它是一个保持cbind()数据的大循环。一种更好的方法是编写一个函数,该函数包含内部循环中的所有代码,并将服务,度量和stat作为参数。我们称这个函数为“process.stat”。它返回数据,或者你想要的任何内容。
首先,您需要提取service / metric / stat元组:
# One column (service)
mat1 <- data.frame(service=names(services))
# List (one entry per service name) of service/metric pairs
list1 <- apply(df1, 1, function(service) expand.grid(service=service, metric=names(services[[service]])))
# Two columns (service and metric)
mat2 <- do.call(rbind, list1)
# List (one entry per service/metric pair) of service/metric/stat tuples
list2 <- apply(df2, 1, function(x) expand.grid(service=x[1], metric=x[2], stat=services[[x[1]]][[x[2]]]))
# Three columns (service, metric, and stat)
tuples <- do.call(rbind, list2)
然后你会使用apply系列中的某些东西来调用你想要处理的每个服务/度量/统计组合上的process.stat:
data.merged <- apply(tuples, 1, process.stat)
答案 1 :(得分:1)
通配符! Graphite URL API支持使用基于Perl的正则表达式,允许您使用通配符查询度量标准树。
如果我有以下内容 -
stats.A.A
stats.A.B
stats.A.C
stats.B.A.1
stats.B.A.2
stats.B.A.3
stats.C.B.C.D.1
stats.C.B.C.D.2
stats.C.B.C.D.3
stats.C.B.C.D.4
然后group(stats.*.*,stats.*.*.*,stats.*.*.*.*)
会解析所有这些内容。另一个有趣的功能是groupByNode
。