我得到了一个具有不同值的数组,我想计算一个百分比值,该值代表所有元素的相似性使用阈值属性。
数组可能如下所示:
var array = [42.98, 42.89, 42.91, 42.98, 42.88] // should return nearly 100%
var array = [42.98, 22.89, 42.91, 42.98, 42.88] // should return maybe 80%
var array = [42.98, 332.89, 122.91, 5512.98, -12.88] // should return nearly 0%
所以100%代表所有元素是否相同 ...和0%代表元素不同的情况。通过编辑阈值
来设置调整我真的不知道如何解决这个问题(我是一个绝对的新手) - 但这是我到目前为止所有这一切,显然它不是那样工作:
function checkSimilarity(array, threshold) {
var sum = array.reduce((a, b) => a + b, 0),
percentage = 0;
for (var i =0; i< array.length; i++) {
var diff = (sum / array.length) * i
percentage += diff
}
return percentage * (threshold/100)
}
任何帮助如何解决我创建工作算法的问题都将非常感激!
答案 0 :(得分:1)
略有不同的方法。绝不意味着最有效,但它确实适用于您的样本数据。
https://codepen.io/anon/pen/RMWjRL?editors=0010
const array1 = [42.98, 42.89, 42.91, 42.98, 42.88]; // should return nearly 100%
const array2 = [42.98, 22.89, 42.91, 42.98, 42.88]; // should return maybe 80%
const array3 = [42.98, 332.89, 122.91, 5512.98, -12.88]; // should return nearly 0%
const similarity = (arr) => {
const dict = {};
arr.forEach(item => {
const val = Math.round(item);
dict[val] ? dict[val]++ : dict[val] = 1;
});
let largest = 1;
Object.keys(dict).forEach(key => largest = dict[key] > largest ? dict[key] : largest);
return largest / arr.length;
};
console.log(similarity(array1)); // 1
console.log(similarity(array2)); // 0.8
console.log(similarity(array3)); // 0.2
答案 1 :(得分:0)
var array1 = [42.98, 42.89, 42.91, 42.98, 42.88] // should return nearly 100%
var array2 = [42.98, 22.89, 42.91, 42.98, 42.88] // should return maybe 80%
var array3 = [42.98, 332.89, 122.91, 5512.98, -12.88] // should return nearly 0%
function calculateRange(data) {
var disimilarity;
var sum = data.reduce((a, b) => a + b, 0);
var mean = sum / data.length
// loop through passed array
data.forEach(function(item, idx) {
// calculate percentage diff from mean
var percentageDiff = 100 - (item / mean * 100)
// insure value is always positive
if (percentageDiff < 0) {
percentageDiff = -percentageDiff;
}
// mean aggrigate the diff value
if(disimilarity) {
disimilarity = (disimilarity + percentageDiff) / 2
} else {
disimilarity = percentageDiff
}
})
// subtract mean disimiliarty from 100%
return 100 - disimilarity;
}
var array1DOM = document.getElementById("array1")
var array2DOM = document.getElementById("array2")
var array3DOM = document.getElementById("array3")
array1DOM.innerHTML = calculateRange(array1)
array2DOM.innerHTML = calculateRange(array2)
array3DOM.innerHTML = calculateRange(array3)
&#13;
<div>
<div id="array1"></div>
<div id="array2"></div>
<div id="array3"></div>
</div>
&#13;
简单来说,这个解决方案是将百分比差异与数据集的平均值进行汇总,以确定准确性。你会注意到前两个数组按照要求提供近100%和80%的答案。问题出现在最终阵列中。由于此模型基于均值的变化,array3中值之间缺乏相关性导致如此高的相异性得分,因此它是负值。
我无法解决这个问题,因为我无法猜出你的最大差值是多少。如果该值已知,我可以使用它来标准化值,使得返回的范围是0-100。如果您永远不知道最大差异,我建议的唯一可能的解决方案是:
如果您可以提供有关此任务目的/背景的信息,我们可以指定更多信息。
答案 2 :(得分:0)
我使用欧几里德距离来解决这个问题。但是,我不知道这会满足你的问题。
const similarity = list => {
if (list.length < 1) return 0;
if (list.length < 2) return 100;
let listPair = [];
for (let i = 0; i < list.length - 1; i++)
listPair.push({ a: list[i], b: list[i + 1] });
const sum = listPair.reduce((acc, { a, b }) => acc + Math.pow(a - b, 2), 0);
const calculation = 100 - Math.sqrt(sum);
return calculation < 0 ? 0 : calculation;
};
let list = [];
console.log(similarity(list)); // 0%
list = [42.98, 42.89, 42.91, 42.98, 42.88];
console.log(similarity(list)); // ~99%
list = [42.98, 22.89, 42.91, 42.98, 42.88];
console.log(similarity(list)); // ~71%
list = [10, 10, 10, 20, 10];
console.log(similarity(list)); // ~85%
list = [42.98, 332.89, 122.91, 5512.98, -12.88];
console.log(similarity(list)); // 0%
list = [45.51, 45.51, 45.51, 45.51, 45.51];
console.log(similarity(list)); // 100%
list = [10];
console.log(similarity(list)); // 100%
&#13;