我有一个看起来像这样的数据框:
# A tibble: 9 x 5
# Groups: group [3]
group year value1 value2 value3
<int> <dbl> <int> <int> <int>
1 1 2000 NA 3 4
2 1 2001 8 3 4
3 1 2002 4 3 NA
4 2 2000 NA NA 1
5 2 2001 9 NA 1
6 2 2002 1 NA NA
7 3 2000 NA 5 NA
8 3 2001 9 5 NA
9 3 2002 NA 5 NA
我需要一个脚本,该脚本返回每列的第一个和最后一个非na值的年份,而与组无关。理想情况下,输出应如下所示。注意实际的数据集会更大。
start end
value 1 2001 2002
value 2 2000 2002
value 3 2000 2001
答案 0 :(得分:2)
我们可以重塑为'long'格式,然后按'name'和<?php
require_once 'connection.php';
$fName = filter_input(INPUT_POST, "fName") ? filter_input(INPUT_POST, 'fName') : null;
$lName = filter_input(INPUT_POST, "lName") ? filter_input(INPUT_POST, 'lName') : null;
$email = filter_input(INPUT_POST, "email") ? filter_input(INPUT_POST, 'email') : null;
$tempCheck1 = filter_input(INPUT_POST, "defaultCheck1") ? filter_input(INPUT_POST, 'defaultCheck1') : null;
$tempCheck2 = filter_input(INPUT_POST, "defaultCheck2") ? filter_input(INPUT_POST, 'defaultCheck2') : null;
$tempCheck3 = filter_input(INPUT_POST, "defaultCheck3") ? filter_input(INPUT_POST, 'defaultCheck3') : null;
$tempCheck4 = filter_input(INPUT_POST, "defaultCheck4") ? filter_input(INPUT_POST, 'defaultCheck4') : null;
$sqlInsert = "INSERT INTO dbo.form (fName, lName, email, category ) VALUES (:fName, :lName, :email, :defaultCheck1, :defaultCheck2, :defaultCheck3, :defaultCheck4)
$stmt = $conn->prepare($sqlInsert);
$stmt->bindParam(':fName', $fName);
$stmt->bindParam(':lName', $lName);
$stmt->bindParam(':email', $email);
$stmt->bindParam(':defaultCheck1', $tempCheck1);
$stmt->bindParam(':defaultCheck2', $tempCheck2);
$stmt->bindParam(':defaultCheck3', $tempCheck3);
$stmt->bindParam(':defaultCheck4', $tempCheck4);
if ($stmt->execute()) {
$conn->commit();
return true;
} else {
$conn->rollback();
return false;
}
?>
进行分组以获得summarise
和min
'year'
max
或者使用library(dplyr)
library(tidyr)
library(tibble)
df1 %>%
select(-group) %>%
pivot_longer(cols = starts_with('value'), values_drop_na = TRUE) %>%
group_by(name) %>%
summarise(start = min(year), end = max(year)) %>%
column_to_rownames('name')
# start end
#value1 2001 2002
#value2 2000 2002
#value3 2000 2001
中的melt
data.table
或者我们也可以使用library(data.table)
melt(setDT(df1), id.var = c('year', 'group'), na.rm = TRUE)[,
.(start = min(year), end = max(year)), .(variable)]
summarise_at
df1 %>%
summarise_at(vars(starts_with('value')), ~
list(range(year[!is.na(.)]))) %>%
unnest(everything()) %>%
pivot_longer(everything())
答案 1 :(得分:2)
一个基本的解决方案是,我们在NA
中找不到year
至value1
列的value3
。
data.frame(t(sapply(paste0("value", 1:3), function(i){
val_i <- df1[ , i]
data.frame(start =
df1$year[min(which(!is.na(val_i)))], end=
df1$year[max(which(!is.na(val_i)))])
})))
答案 2 :(得分:1)
另一个data.table
选项。不确定是否建议使用names(.SD)
,但扩展性应该很好
library(data.table)
setDT(df1)[, .(val = names(.SD),
start = lapply(.SD, function(x) min(year[!is.na(x)])),
end = lapply(.SD, function(x) max(year[!is.na(x)]))), .SDcols = startsWith(names(df1), "value")]
val start end
1: value1 2001 2002
2: value2 2000 2002
3: value3 2000 2001