如果有人可以帮助我,我已经坚持了一段时间。我有一个看起来像这样的数据集
('ul', class_='list')
我想创建一个表,该表为每个唯一的A / B / C给出在“值”列中有值的行,如果没有则在第一行中显示(,如C ),即结果输出应类似于
Col1 Date Value
A 2019-01-01 NA
A 2019-01-02 4
A 2019-01-03 NA
B 2019-02-01 NA
B 2019-02-02 NA
B 2019-02-03 1
C 2019-03-01 NA
C 2019-03-02 NA
C 2019-03-03 NA
答案 0 :(得分:2)
我们可以按'Col1',{
"versionInfo": {
"implementation": {
"name": "spring-cloud-dataflow-server",
"version": "2.6.3"
},
"core": {
"name": "Spring Cloud Data Flow Core",
"version": "2.6.3"
},
"dashboard": {
"name": "Spring Cloud Dataflow UI",
"version": "2.5.1"
},
"shell": {
"name": "Spring Cloud Data Flow Shell",
"version": "2.6.3",
"url": "https://repo1.maven.org/maven2/org/springframework/cloud/spring-cloud-dataflow-shell/2.6.3/spring-cloud-dataflow-shell-2.6.3.jar"
}
},
"featureInfo": {
"streamsEnabled": true,
"tasksEnabled": true,
"schedulesEnabled": true,
"grafanaEnabled": false,
"wavefrontEnabled": false
},
"securityInfo": {
"isAuthenticationEnabled": false,
"isAuthenticated": false,
"username": null,
"roles": []
},
"runtimeEnvironment": {
"appDeployer": {
"platformSpecificInfo": {},
"deployerImplementationVersion": "2.5.2",
"deployerName": "Spring Cloud Skipper Server",
"deployerSpiVersion": "2.5.2",
"javaVersion": "11.0.6",
"platformApiVersion": "",
"platformClientVersion": "",
"platformHostVersion": "",
"platformType": "Skipper Managed",
"springBootVersion": "2.2.8.RELEASE",
"springVersion": "5.2.7.RELEASE"
},
"taskLaunchers": [
{
"platformSpecificInfo": {},
"deployerImplementationVersion": "2.4.1",
"deployerName": "LocalTaskLauncher",
"deployerSpiVersion": "2.4.1",
"javaVersion": "11.0.6",
"platformApiVersion": "Windows 10 10.0",
"platformClientVersion": "10.0",
"platformHostVersion": "10.0",
"platformType": "Local",
"springBootVersion": "2.2.8.RELEASE",
"springVersion": "5.2.7.RELEASE"
}
]
},
"grafanaInfo": {
"url": "",
"token": "",
"refreshInterval": 15
},
"monitoringDashboardInfo": {
"url": "",
"token": "",
"refreshInterval": 15,
"source": "default-scdf-source"
}
}
分组第一行,或者slice
将所有'Value'设为if
,返回第一行
NA
-输出
library(dplyr)
df1 %>%
group_by(Col1) %>%
slice(if(all(is.na(Value))) 1 else which(!is.na(Value))[1])
或者另一个选择是先# A tibble: 3 x 3
# Groups: Col1 [3]
# Col1 Date Value
# <chr> <chr> <int>
#1 A 2019-01-02 4
#2 B 2019-02-03 1
#3 C 2019-03-01 NA
,然后在第一行进行arrange
slice
df1 %>%
group_by(Col1) %>%
arrange(is.na(Value), .by_group = TRUE) %>%
slice(1)