无法在SAS中解决此问题,这是我的数据:
表A:3列
var nodeoutlook = require('nodejs-nodemailer-outlook')
var nodemailer = require("nodemailer");
const promisify = require("es6-promisify");
nodeoutlook.sendEmail({
auth: {
user: "johnexample@organization.com",
pass: "johnpassword"
}, from: 'info@myorganization.com',
to: 'jackexample@organization.com',
subject: 'Hey you, awesome!',
html: '<b>This is bold text</b>',
text: 'This is text version!'
attachments: [
{ // file on disk as an attachment
filename: 'text3.txt',
path: '/path/to/file.txt' // stream this file
}
]
});
async function main(){
let account = await nodemailer.createTestAccount();
let transporter = nodemailer.createTransport({
host: "smtp.ethereal.email",
port: 587,
secure: false,
auth: {
user: account.user,
pass: account.pass
}
});
let mailOptions = {
from: '"Fred "
to: "bar@example.com, baz@example.com", // list of receivers
subject: "Hello",
text: "Hello world?", // plain text body
html: "<b>Hello world?</b>" // html body
};
let info = await transporter.sendMail(mailOptions)
}
main().catch(console.error)
ID变量是唯一的,而COUNT变量的离散范围是1-12。
表B:13列
ID COUNT MEAN
A 2 0.034
B 4 -0.052
C 7 0.327
.. .. ..
CLUSTER变量当前的范围是1-12,但它没有固定,因此代码需要 能够合并集群数量可能不同的边缘情况。
问题: 从表A,我想使用COUNT变量作为参考来找到最接近的 MEAN到对应MEAN(N)中可能的MEAN值列表 从表B,然后从与关联的行中选择关联的CLUSTER号 最接近的匹配项。
例如,ID'A'的COUNT为2,平均值为0.034,因此我需要找到最接近的 表B中MEAN2列的可能MEAN值列表中该ID的平均值。 我需要针对12个可能的COUNT值和12个MEAN列执行此操作。
数组和索引已超出我的理解范围,因此不胜感激, 布兰登
答案 0 :(得分:0)
如果将聚类均值转置到此布局中,查找将变得更加容易
Cluster Count Mean
1 1 0.344
1 2 0.234
1 3 0.233
…
2 1 0.234
2 2 0.234
2 3 0.343
…
然后您可以使用SQL选择与所观察到的均值最接近的聚类均值
select
id,
clusters_tall.cluster,
clusters_tall.mean as cluster_mean,
observed.mean - clusters_tall.mean as delta
from
observed
join
clusters_tall
on
observed.count = clusters_tall.count
group by
observed.id
having
abs(observed.mean - clusters_tall.mean) = min (abs (observed.mean - clusters_tall.mean))
示例:
* create some sample data for clusters;
data clusters;
do cluster = 1 to 50;
array mean(12);
call missing (of mean(*));
do _n_ = 1 to ceil(12*ranuni(123));
mean(_n_) = round(ranuni(123),0.0001);
end;
output;
end;
run;
* create some sample data for observed;
* 60% of the id will be given a mean close to a randomly selected (picked) existing cluster mean;
data observed (keep=id count mean pick flag);
length id count mean 8.;
do id = 1 to 1000;
pick = ceil(50*ranuni(123));
point = pick;
set clusters point=point;
mean = .;
if ranuni(123) < 0.6 then do until (not missing(mean));
array means mean1-mean12;
flag = '*';
count = ceil(12 * ranuni(123));
if not missing(means(count)) then
mean = means(count) + round((ranuni(123)-0.5) / 1e3,0.0001);
end;
else do;
flag = 'R';
count = ceil (12 * ranuni(123));
mean = round(ranuni(123),0.001) + 0.0005;
end;
output;
end;
stop;
run;
* use data step to transpose the 'wide' form of cluster data into the 'cluster/count/mean' layout;;
data clusters_tall(keep=cluster count mean);
set clusters;
array means mean1-mean12;
do _n_ = 1 to dim(means) while (not missing(means(_n_)));
count = _n_;
mean = means(_n_);
output;
end;
run;
* match the observed mean to the closest cluster mean;
* inner join in sub-select is used to get closest cluster, but there could be
* more than one cluster with same closest state.;
* Thus, left join to the sub-select and chose the one
* with the lowest cluster number in case of a tie;
proc sql;
create table observed_matched(label="Observed matched to cluster with closest mean") as
select observed.*, matched.cluster, matched.cluster_mean, matched.delta
from
observed
left join
(
select
id,
clusters_tall.cluster,
clusters_tall.mean as cluster_mean,
observed.mean - clusters_tall.mean as delta
from
observed
join
clusters_tall
on
observed.count = clusters_tall.count
group by
observed.id
having
abs(observed.mean - clusters_tall.mean) = min (abs (observed.mean - clusters_tall.mean))
) as matched
on observed.id = matched.id
group by observed.id
having cluster = min(cluster)
order by id, cluster
;