R中的HistogramTools包用法

时间:2014-08-09 19:32:53

标签: r histogram

我根据下面的脚本生成两个直方图,它们在x轴上不相等,而我希望比较它们,我不能这样做。因此,我可以做些什么来正确运行脚本,任何想法如何解决这个问题?

由于

x<-c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 
      2, 6, 5, 13, 12, 15, 27, 34, 37, 58, 85, 90, 111, 131, 161, 164, 
      191, 211, 267, 293, 288, 320, 364, 370, 379, 413, 429, 473, 546, 
      539, 551, 593, 614, 594, 644, 617, 599, 605, 531, 591, 524, 482, 
      470, 437, 446, 428, 384, 368, 331, 332, 320, 317, 295, 266, 286, 
      284, 342, 360, 394, 480, 502, 600, 547, 610, 524, 545, 497, 414, 
      381, 345, 351, 371, 326, 336, 341, 336, 324, 346, 360, 386, 368, 
      396, 428, 432, 434, 438, 513, 498, 452, 452, 403, 397, 407, 405, 
      460, 515, 541, 608, 522, 542, 514, 517, 551, 661, 669, 739, 805, 
      847, 921, 1031, 965, 973, 1030, 1043, 815, 818, 648, 520, 433, 
      338, 295, 162, 106, 70, 44, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)

y<-c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 
      1, 1, 6, 4, 11, 4, 9, 15, 12, 34, 40, 49, 75, 65, 107, 132, 136, 
      157, 178, 189, 217, 278, 276, 296, 323, 435, 464, 473, 581, 613, 
      705, 820, 925, 1025, 1061, 1080, 1176, 1236, 1166, 1075, 1027, 
      976, 935, 807, 697, 658, 593, 440, 408, 347, 312, 296, 242, 284, 
      260, 243, 254, 283, 291, 371, 444, 470, 607, 719, 676, 722, 644, 
      678, 650, 662, 666, 607, 621, 558, 623, 634, 634, 699, 756, 771, 
      790, 852, 893, 1011, 1048, 1010, 966, 936, 860, 791, 681, 686, 
      752, 850, 952, 1049, 1094, 1134, 1156, 1198, 1351, 1342, 1533, 
      1461, 1271, 1065, 865, 739, 534, 459, 359, 275, 169, 124, 108, 
      80, 74, 64, 69, 61, 59, 56, 60, 76, 113, 102, 132, 101, 79, 92, 
      55, 41, 26, 17, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
      0, 0, 0)

h1 <- hist(x)
h2 <- hist(y)

require(HistogramTools) 

minkowski.dist(h1, h2, 1)
minkowski.dist(h1, h2, 2)
minkowski.dist(h1, h2, 3)
intersect.dist(h1, h2)
kl.divergence(h1, h2)
jeffrey.divergence(h1, h2)

1 个答案:

答案 0 :(得分:2)

两个向量(x和y)都有256个观测值。这些观察结果是否配对,即每个x都有相应的y,它们具有相同的测量单位?

如果是,您可以从另一个向量中减去一个向量,并绘制其差异的直方图以进行相互比较。如下所示:

length(x) #check number os observations in x
length(y) #check number os observations in y
diff = x-y #difference between x and y
hist(diff, xlab="x-y", main="Difference of vectors x and y") 

如果x和y不能配对,但它们具有相同的测量单位,则可以选择Hav0k。
在两个直方图上设置相同长度的轴,并打破以在视觉上相互比较。

par(mfrow=c(1,2)) #stacks hisotgrams in one row and two columns
hist(x, xlim=c(0,1600), ylim=c(0,200), breaks=seq(0,1600,100),main="") 
hist(y, xlim=c(0,1600), ylim=c(0,200), breaks=seq(0,1600,100),main="")

也可以使用以下方法计算相异度指标:

h1 = hist(x, xlim=c(0,1600), ylim=c(0,200), breaks=seq(0,1600,100),main="") 
h2 = hist(y, xlim=c(0,1600), ylim=c(0,200), breaks=seq(0,1600,100),main="")
minkowski.dist(h1, h2, 1) #116
minkowski.dist(h1, h2, 2) #38.88
minkowski.dist(h1, h2, 3) #29.81
intersect.dist(h1, h2) #0.22

如果x和y具有不同的测量单位,则在计算差异之前,可以选择standardizing数据。

x_standardized = (x-mean(x))/(sd(x))
y_standardized = (y-mean(y))/(sd(y))
h1=hist(x_standardized)
h2=hist(y_standardized)
minkowski.dist(h1, h2, 1) #58
minkowski.dist(h1, h2, 2)#26.57
minkowski.dist(h1, h2, 3) #22.1
intersect.dist(h1, h2) # 0.11
kl.divergence(h1, h2) # 0.07
jeffrey.divergence(h1, h2) #0.03