我有一个较大的data.table
,有两列,id
和var
:
head(DT)
# id var
# 1: 1 B
# 2: 1 C
# 3: 1 A
# 4: 1 C
# 5: 2 B
# 6: 2 C
我想创建一种交叉表,显示数据中出现var
个不同长度的2组合的次数。
预期输出:
out
# A B C
# A 0 3 3
# B NA 1 3
# C NA NA 0
解释
var
发生的所有id
的全部相同次数(全部为A
,或B
或C
)。在示例数据中,id
4只有一个条目B
,因此B
- B
在所需结果中为1。id
两个特定var
存在多少A
,即B
- id
组合出现在A
中s,组合C
- B
和C
- id
。 var
,两个id
的单个组合只能是0(不存在)或1(存在),即我不想计算它每DT <- structure(list(id = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L), var = c("B", "C", "A",
"C", "B", "C", "C", "A", "B", "B", "C", "C", "C", "C", "B", "C",
"B", "A", "C", "B")), .Names = c("id", "var"), row.names = c(NA,
-20L), class = "data.frame")
library(data.table)
setDT(DT, key = "id")
多次。(只要相关信息存在,结果也可以长格式给出。)
我确定这是一种聪明(有效)的计算方法,但我目前无法解决这个问题。
示例数据:
<form action="prosescomment.php" method="POST" id="form" >
<div class="success_wrapper">
<div class="success">Contact form submitted!<br>
<strong>We will be in touch soon.</strong> </div>
</div>
<fieldset>
<input type="text" name="nama" placeholder="Name:">
<br class="clear">
<span class="error error-empty">*This is not a valid name.</span><span class="empty error-empty">*This field is required.</span>
<label class="email">
<input type="text" name="email" placeholder="E-mail:">
<br class="clear">
<span class="error error-empty">*This is not a valid email address.</span><span class="empty error-empty">*This field is required.</span> </label>
<label class="message">
<textarea type="text" name="message" placeholder="Message"></textarea>
<br class="clear">
<span class="error">*The message is too short.</span> <span class="empty">*This field is required.</span> </label>
<div class="clear"></div>
<div class="btns"><input type="submit" name="submit" value="send" class="link1"/>
<div class="clear"></div>
</div>
</fieldset>
</form>
<?php
include "connection.php";
if(isset($_POST['submit'])) //
{
$nama = $_POST['nama'];
$email = $_POST['email'];
$message = $_POST['message'];
$query = "INSERT INTO comment VALUES ('$nama', '$email', '$message')";
$result = mysql_query($query);
if ($result) {
header("location:index.html");
}
else{
mysql_error();
}
}
?>
答案 0 :(得分:10)
由于你可以获得长篇成绩:
DT[, if(all(var == var[1]))
.(var[1], var[1])
else
as.data.table(t(combn(sort(unique(var)), 2))), by = id][
, .N, by = .(V1, V2)]
# V1 V2 N
#1: A B 3
#2: A C 3
#3: B C 3
#4: B B 1
或者,如果我们调用上述输出res
:
dcast(res[CJ(c(V1,V2), c(V1,V2), unique = T), on = c('V1', 'V2')][
V1 == V2 & is.na(N), N := 0], V1 ~ V2)
# V1 A B C
#1: A 0 3 3
#2: B NA 1 3
#3: C NA NA 0
combn
的替代方案正在执行:
DT[, if (all(var == var[1]))
.(var[1], var[1])
else
CJ(var, var, unique = T)[V1 < V2], by = id][
, .N, by = .(V1, V2)]
# V1 V2 N
# 1: A B 3
# 2: A C 3
# 3: B C 3
# 4: B B 1
# or combn with list output (instead of matrix)
unique(DT, by=NULL)[ order(var), if(.N==1L)
.(var, var)
else
transpose(combn(var, 2, simplify=FALSE)), by = id][
, .N, by = .(V1, V2)]