我正在研究使用nodeJs的小型机器学习理论算法。 我的目标是将多个阵列模式与一个源模式进行比较然后返回方式 类似的他们被表示为百分比。对于示例,pattern1可能与源模式类似80%。
确定一个阵列与另一个阵列的百分比相似度的最佳方法是什么?
到目前为止我做了什么......
//source
var soureSequence = [0.53,0.55,0.50,0.40,0.50,0.52,0.58,0.60]
//patterns to compare
var sequence1 = [0.53,0.54,0.49,0.40,0.50,0.52,0.58,0.60]
var sequence2 = [0.53,0.55,0.50,0.42,0.50,0.53,0.57,0.62]
由于我选择了基于百分比的结果,我认为我的源模式应该从数据的第一个值到第二个值的百分比变化。
var percentChange = (firstVal, secondVal) => {
var pChange = ((parseFloat(secondVal) - firstVal) /
Math.abs(firstVal)) * 100.00;
//To avoid NaN , Infinity , and Zero
if(!pChange || pChange == 0){
return 0.00000001
}
return pChange;
}
在这里,我将从源序列生成源模式
var storePattern = function(sequence){
var pattern = [];
for(var i = 0 ; i < sequence.length ; i++){
let $change = percentChange(sequence[i] , sequence[i + 1]);
if(i != sequence.length && $change ){
pattern.push($change)
}
}
return pattern;
}
var sourcePattern = storePattern(soureSequence);
现在我将创建更多可供比较的模式
var testPattern1 = storePattern(sequence1);
var testPattern2 = storePattern(sequence2);
以下是我的比较功能
var processPattern = function(source , target){
var simularityArray = [];
for(var i = 0 ; i < target.length ; i++){
//Compare percent change at indexof testPattern to sourcePattern of same index
let change = Math.abs(percentChange(target[i] , source[i]));
simularityArray.push(100.00 - change);
}
var rating = simularityArray.reduce((a,b) => {
return a + b
});
//returns percent rating based of average of similarity pattern
rating = rating / parseFloat(source.length + ".00");
return rating;
}
现在我可以尝试估算相似度
var similarityOfTest1 = processPattern(sourcePattern , testPattern1)
我的问题是这只适用于相同范围值的序列..例如 0.50,0.52 ..这些值的百分比变化不会是 0.20,0.22 相同但价值差异相同,即 - &gt; 0.02
我想到了基于价值模式的差异,但此时我已经迷失了。
将考虑所有答案。谢谢你的帮助!
答案 0 :(得分:1)
使用reduce来获得差异而不是平均值。
//patterns to compare
var sequence1 = [0.53,0.54,0.49,0.40,0.50,0.52,0.58,0.60]
var sequence2 = [0.53,0.55,0.50,0.42,0.50,0.53,0.57,0.62]
function diff(sequence){
var soureSequence = [0.53,0.55,0.50,0.40,0.50,0.52,0.58,0.60]
var delta = soureSequence.reduce(function (r, a, i, aa) {
i && r.push(a - sequence[i]);
return r;
}, []),
average = delta.reduce(function (a, b) { return a + b; }) / delta.length;
return {delta:delta, average:average}
}
console.log('sequence1',diff(sequence1));
console.log('sequence2',diff(sequence2));
&#13;
答案 1 :(得分:1)
根据我的经验,使用点积ex测量两个向量(数组)的相似性。就像在链接中所说的那样,你将数组的每个相应元素相乘,然后将它们相加,然后除以每个数组的大小(每个数组的平方和的平方根)。 Rosetta Code有一个dot product in JavaScript的例子,这里复制了
// dotProduct :: [Int] -> [Int] -> Int
const dotProduct = (xs, ys) => {
const sum = xs => xs ? xs.reduce((a, b) => a + b, 0) : undefined;
return xs.length === ys.length ? (
sum(zipWith((a, b) => a * b, xs, ys))
) : undefined;
}
// zipWith :: (a -> b -> c) -> [a] -> [b] -> [c]
const zipWith = (f, xs, ys) => {
const ny = ys.length;
return (xs.length <= ny ? xs : xs.slice(0, ny))
.map((x, i) => f(x, ys[i]));
}
所以,你会打电话给
const score1 = dotProduct(sourceSequence, sequence1);
const score2 = dotProduct(sourceSequence, sequence2);
无论哪个更大,都是与sourceSequence更接近的序列。
答案 2 :(得分:1)
我不确定你是否需要机器学习。你有一个源模式,你有一些输入,你基本上想要执行模式的差异。
机器学习可用于查找模式,假设您有一些用于测量错误的启发式算法(如果您使用的是无监督学习技术),或者您有样本集来训练网络。
但是,如果您只是想测量一个模式和另一个模式之间的差异,那么只需执行差异操作即可。您需要做的是确定测量的差异以及如何将结果标准化。
答案 3 :(得分:1)
我无法确定你想要测量相似度的确切程度。我将计算相应项目的差异并累积这些差异,以查看它与源数组之和产生的偏差。您可以按自己喜欢的方式进行计算。
function check([x,...xs],[y,...ys], state = {sumSource: 0, sumDiff: 0}){
state.sumSource += x;
state.sumDiff += Math.abs(x-y);
return xs.length ? check(xs,ys,state) : (100 - 100 * state.sumDiff / state.sumSource).toFixed(4) + "% similarity";
}
var soureSequence = [0.53,0.55,0.50,0.40,0.50,0.52,0.58,0.60],
sequence1 = [0.53,0.54,0.49,0.40,0.50,0.52,0.58,0.60],
sequence2 = [0.53,0.55,0.50,0.42,0.50,0.53,0.57,0.62];
console.log(check(soureSequence,sequence1));
console.log(check(soureSequence,sequence2));