我有以下向量,它显示了变量可以采用的可能值。正如您所看到的,它不是用户友好的,我很难找到一种系统的方法来确定最小值和最大值。有没有人有什么建议?
[211] "-1\n1-960" "-1\n1-960"
[213] "-1\n1-960" "-1\n1\n2\n3"
[215] "-1\n0\n1\n\n2\n3\n\n4\n\n5" "-1\nF\nG\nH\nP\nR\nS\nU"
[217] "-1\n0\n1\n2\n3" "-1\n0\n1"
[219] "-1\n0\n1\n2\n3\n4\n5\n6" "-1\n0-255"
[221] "-1\n0-255" "-1\n0-255"
[223] "-1\n0-255" "-1\n0-255"
[225] "-1\n0\n0.01–0.99\n1\n1.01–99.99" "-1\n0\n1\n2\n3\n4\n5\n\n6\n\n7\n8\n\n9\n10\n11\n12"
[227] "-1\n0\n1\n\n2\n\n3\n4\n5\n\n6" "-1\n0\n1\n2\n\n3\n\n4\n5\n6"
值" -1 \ n1-960"指的是1到960之间可能的值范围。-1并不代表任何东西,应该忽略所有字母。
例如:
"-1\n1-960"
"-1\n0\n1\n\n2\n\n3\n4\n5\n\n6" "-1\n0\n1\n2\n\n3\n\n4\n5\n6"
应该导致:
max min
960 1
6 0
6 0
答案 0 :(得分:1)
删除前导-1后,您可以拆分换行符。然后,由于-
表示范围,您还可以拆分-
个字符,因为这两个数字给出范围的最小值和最大值。所以这里有一些代码:
lapply(
strsplit(
gsub('^-1\n', '', dat),
'\n|-'
),
function(x) range(x)
)
[[1]]
[1] "1" "960"
[[2]]
[1] "1" "960"
[[3]]
[1] "1" "960"
[[4]]
[1] "1" "3"
[[5]]
[1] "" "5"
[[6]]
[1] "F" "U"
[[7]]
[1] "0" "3"
[[8]]
[1] "0" "1"
[[9]]
[1] "0" "6"
[[10]]
[1] "0" "255"
[[11]]
[1] "0" "255"
[[12]]
[1] "0" "255"
[[13]]
[1] "0" "255"
[[14]]
[1] "0" "255"
[[15]]
[1] "0" "1.01–99.99"
[[16]]
[1] "" "9"
[[17]]
[1] "" "6"
[[18]]
[1] "" "6"
答案 1 :(得分:0)
使用可能会或可能不是部分答案的其他代码扩展我的评论:
我猜-255是某种缺失值标记。其中一些字符值(目前)可以在R中解析为“数字”值,但如果您尝试解析,则其他值会抛出错误。您对1-960
的期望是什么?这是一个表达,所以既不是数字也不是字符。
dat <- c( "-1\n1-960" , "-1\n1-960",
"-1\n1-960" , "-1\n1\n2\n3" ,
"-1\n0\n1\n\n2\n3\n\n4\n\n5" , "-1\nF\nG\nH\nP\nR\nS\nU",
"-1\n0\n1\n2\n3" , "-1\n0\n1" ,
"-1\n0\n1\n2\n3\n4\n5\n6" , "-1\n0-255" ,
"-1\n0-255" , "-1\n0-255" ,
"-1\n0-255" , "-1\n0-255" ,
"-1\n0\n0.01–0.99\n1\n1.01–99.99" , "-1\n0\n1\n2\n3\n4\n5\n\n6\n\n7\n8\n\n9\n10\n11\n12" ,
"-1\n0\n1\n\n2\n\n3\n4\n5\n\n6" , "-1\n0\n1\n2\n\n3\n\n4\n5\n6" )
scandat <- sapply( dat, function(x) try( scan(textConnection(x)) ) )
# Lots of error messages but wrapping the scan call in try let's it continue
# So these are the items that could be parsed as numeric:
> scandat[ sapply(scandat,class)=="numeric" ]
$`-1\n1\n2\n3`
[1] -1 1 2 3
$`-1\n0\n1\n\n2\n3\n\n4\n\n5`
[1] -1 0 1 2 3 4 5
$`-1\n0\n1\n2\n3`
[1] -1 0 1 2 3
$`-1\n0\n1`
[1] -1 0 1
$`-1\n0\n1\n2\n3\n4\n5\n6`
[1] -1 0 1 2 3 4 5 6
$`-1\n0\n1\n2\n3\n4\n5\n\n6\n\n7\n8\n\n9\n10\n11\n12`
[1] -1 0 1 2 3 4 5 6 7 8 9 10 11 12
$`-1\n0\n1\n\n2\n\n3\n4\n5\n\n6`
[1] -1 0 1 2 3 4 5 6
$`-1\n0\n1\n2\n\n3\n\n4\n5\n6`
[1] -1 0 1 2 3 4 5 6
我不是要清理它,但你可以用其他东西替换那些时髦的名字,它会更好地打印出来:
> sapply( scandat[ sapply(scandat,class)=="numeric" ], function(x) list(minx=min(x), maxx=max(x) )
+ )
-1\n1\n2\n3 -1\n0\n1\n\n2\n3\n\n4\n\n5 -1\n0\n1\n2\n3 -1\n0\n1 -1\n0\n1\n2\n3\n4\n5\n6
minx -1 -1 -1 -1 -1
maxx 3 5 3 1 6
-1\n0\n1\n2\n3\n4\n5\n\n6\n\n7\n8\n\n9\n10\n11\n12 -1\n0\n1\n\n2\n\n3\n4\n5\n\n6 -1\n0\n1\n2\n\n3\n\n4\n5\n6
minx -1 -1 -1
maxx 12 6 6