我一直致力于文本挖掘项目。我已经完成了一些LDA主题建模,现在我有了主题概率。我想使用集群包,这样我就可以获得文档之间的欧氏距离,这样我就可以创建一个网络图,但我一直在收到错误。任何有关良好可视化技术的建议也将受到热烈欢迎:)
library(cluster)
FundDist <- as.matrix(daisy(EUTopicNetworks, metric = "euclidean", stand = TRUE))
Error in daisy(EUTopicNetworks, metric = "euclidean", stand = TRUE) : invalid type character for column numbers 1
In addition: Warning messages:
1: In data.matrix(x) : NAs introduced by coercion
2: In daisy(EUTopicNetworks, metric = "euclidean", stand = TRUE) :
with mixed variables, metric "gower" is used automatically
3: In min(x) : no non-missing arguments to min; returning Inf
4: In max(x) : no non-missing arguments to max; returning -Inf
我以前从未使用dput()函数在此网站上传过可重现的数据。所以我希望我做到了这一点。我已经复制并粘贴了下面的输出。感谢您抽出宝贵时间阅读我的问题。
EUTopicNetworks <- structure(list(Filename = c("AT_Burenland_2007.txt", "AT21_Kaernten_07.txt",
"AT12_LowerAustria_07_13.txt", "AT_Nat_2007.txt", "AT34_Salzburg_07.txt",
"AT22_Steiermark_07.txt", "AT36_Tirol_07.txt", "UpperAustria2007.txt",
"AT13_Vienna_07.txt", "vorarlberg2007.txt", "AT_Austria_1.txt",
"AT11_Burgenland_1", "lowe austria 2014.txt", "AT13_Vienna2_14.txt",
"AT21_Kaernten_14.txt", "AT22_Steiermark_14.txt", "AT31_UpperAustria_14.txt",
"AT35_Salzburg_14.txt", "AT36_Tirol_14.txt", "AT37_Vorarlberg_14.txt",
"abbruzzo2007-2013.txt", "calabria2007-2013.txt", "campania2007-2013.txt",
"emiliaromagna2007-2013.txt", "sicily2007.txtt", "friuli2007-2013.txt",
"lazio2007-2013.txt", "liguria2007.txt", "lombardy2007-2013.txt",
"piemonte2007-2013.txt", "puglia2007-2013.txt", "sardinia2007-2013.txt",
"Bolzano_07.txt", "umbria 2007-2013.txt", "valledaosta 2007-2013.txt",
"tuscany2007.txt", "VENETO2007-2013.txt", "abruzzo2014-2020.txt",
"basilicata2014-2020.txt", "calabria2014-2020.txt", "campania2014-2020.txt",
"emiliaromagna2014-2020.txt", "sicily2014.txt", "friuli2014-2020.txt",
"lazio2014.txt", "liguria2014.txt", "lombardia2014-2020.txt",
"piemonte2014-2020.txt", "puglia_14.txt", "sardinia2014.txt",
"Bolzano_14.txt", "umbria2014.txt", "valledaosta 2014-2020.txt",
"tuscany2014.txt", "molise_14.txt", "molise_07.txt", "trento2007.txt",
"trento2014.txt", "ITALIANSTRATEGICPLAN2007-2013.txt", "italyinnovationstrategy2014-2020.txt",
"veneto2014-2020.txt", "aquitanie2014-2020.txt", "aquitanie2007.txt",
"auvergne2014-2020.txt", "auvergne_07.txt", "bretagne2014-2020.txt",
"bretagne_07.txt", "centre2014-2020.txt", "centre2007.txt", "champagne-ardenne 2007.txt",
"champagne-ardenne 2014.txt", "PICARDIE2007.txt", "picardie2014.txt",
"bassenormandie 2007.txt", "bassenormandie 2014.txt", "bourgogne2014.txt",
"bourgogne_07.txt", "midi-pyrenees2007.txt", "midipyrennes14.txt",
"franche-comte2014-2020.txt", "franche-comte_2007.txt", "hautenormandie2007.txt",
"hautenormandie2014-2020.txt", "limousine2014-2020.txt", "limousine2007.txt",
"loire2007.txt", "loire2014-2020.txt", "lorraine2014-2020.txt",
"lorraine2007.txt", "nordpasdecalais2007.txt", "nordpasdecalais2014-2020.txt",
"rhonealpes2014-2020.txt", "rhone-alpes2007.txt", "poitou-charenter2007.txt",
"poituou-charentes2014.txt", "corse2007.txt", "corsica.txt",
"bretagne_07.txt", "bretagne2014-2020.txt", "Baden-Wu_07.txt",
"Baden-wu14.txt", "bavaria2007.txt", "BAVARIA_14.txt", "BERLIN2014-2020.txt",
"Berlin_07.txt", "bradenburgh2014.txt", "Bradenburgh2007.txt",
"bremen2007.txt", "bremen2014.txt", "hamburg_07.txt", "HAMBURGO2014-2020.txt",
"Hessen_07.txt", "Hessian1.txt", "LowerSaxony2_07.txt", "LOWERSAXONY2014-2020.txt",
"Mecklenburg_07.txt", "MECKPOMM2014-2020.txt", "rheinland2014-2020.txt",
"RhinelanPlatz_07.txt", "saarland2014-2020.txt", "saarland_07.txt",
"sachsen-anhalt2014-2020.txt", "sachsen-anhelt2007.txt", "saxony_07.txt",
"saxony_14.txt", "Schleswig-Holstein2020.txt", "Schleswig-Holstein_07.txt",
"thuringia2007.txt", "THURINGIA2014-2020.txt", "Andalucia_2007-2013.txt",
"Andalusia_14.txt", "Aragon_14.txt", "Aragon_2007.txt", "Asturias_2007.txt",
"ES12_Asturias.txt", "Baleares_2007.txt", "Balears_14.txt", "Canarias_07.txt",
"Canaries_14.txt", "Cantabria_2007.txt", "ES13_Cantabria_14.txt",
"Castillala_Mancha_2007.txt", "ES42_Castilla-la_mancha.txt",
"CastillayLeon_dic_2007.txt", "ES41_Castilla-Leon.txt", "ES51_Catalonia_14.txt",
"catalonia2007.txt", "Madrid_2007-13.txt", "Madrid_14.txt", "Murcia_14.txt",
"murcia2007.txt", "Valencia_14.txt", "Valenciana_2007.txt", "laRioja2007.txt",
"CombiEngland_07.txt", "EastWales_07.txt", "NorthernIreland_07.txt",
"Scotland_07.txt", "WestWales_07.txt", "EastWales_14.txt", "England_14.txt",
"Northern_Ireland14.txt", "Scotland14.txt", "Westwales_14.txt",
"malta2007-2013.2.txt", "malta2014-2020.txt2.txt"), Funds = c(0.028649302,
0.036198106, 0.041060412, 0.036543709, 0.047044295, 0.01659907,
0.019221094, 0.056763265, 0.052615278, 0.045216842, 0.048176521,
0.038976137, 0.027341846, 0.037721688, 0.049252945, 0.05918185,
0.05440539, 0.017412537, 0.029307636, 0.022385126, 0.019737738,
0.027626844, 0.0334503, 0.043976555, 0.042856083, 0.021046234,
0.018061427, 0.014983543, 0.067145641, 0.019741648, 0.019018285,
0.030614714, 0.019666862, 0.028158874, 0.026009936, 0.019330949,
0.023088856, 0.044273539, 0.021168401, 0.017627883, 0.030486684,
0.017509486, 0.034035728, 0.034106673, 0.043486846, 0.029087254,
0.050564915, 0.047219925, 0.051437475, 0.029694445, 0.008588781,
0.045469371, 0.060967658, 0.049260664, 0.015106536, 0.026186649,
0.023254401, 0.053579943, 0.031056644, 0.045125396, 0.057680642,
0.01125217, 0.042532521, 0.041545015, 0.047940862, 0.036641552,
0.072252939, 0.035679102, 0.067488953, 0.008492444, 0.021052205,
0.020152732, 0.040564092, 0.02921307, 0.018565646, 0.022775302,
0.011711217, 0.019967731, 0.00877454, 0.022250866, 0.003696986,
0.011277284, 0.007740289, 0.02790784, 0.008134596, 0.014931457,
0.03269353, 0.041386999, 0.066164327, 0.011440048, 0.006215758,
0.010688796, 0.003811851, 0.003303556, 0.023094521, 0.010550119,
0.018023822, 0.022757839, 0.017667203, 0.02073341, 0.013537221,
0.011950717, 0.009010298, 0.019796088, 0.011314152, 0.01098032,
0.008832217, 0.040330019, 0.005822583, 0.006599734, 0.016338338,
0.013906508, 0.010973094, 0.010448791, 0.003723683, 0.013769165,
0.007583811, 0.009724543, 0.00237987, 0.005005899, 0.005048481,
0.013000829, 0.012671508, 0.003054379, 0.03508621, 0.012981055,
0.021982606, 0.009448894, 0.014883524, 0.018772709, 0.006068872,
0.018122102, 0.020449118, 0.015102835, 0.005449833, 0.011014679,
0.016602374, 0.006482356, 0.009969209, 0.002646448, 0.01205523,
0.04659564, 0.010866707, 0.0144986, 0.046946229, 0.028629168,
0.034634807, 0.059078927, 0.002919951, 0.016168915, 0.024403654,
0.09171777, 0.009978063, 0.015196456, 0.015174811, 0.047399696,
0.015303701, 0.011753077, 0.014862118, 0.01487099, 0.011742448,
0.018346786, 0.010785336, 0.010421162, 0.013791872, 0.026389358
), Biotech = c(0.024814541, 0.005668351, 0.017716491, 0.00853945,
0.015916015, 0.03888657, 0.001333459, 0.017368849, 0.023781704,
0.051278428, 0.005484117, 0.021759003, 0.027973849, 0.002774256,
0.005744201, 0.004244159, 0.00468969, 0.000581776, 0.022734494,
0.03445351, 0.000800523, 0.000362683, 0.026945766, 0.006823146,
0.005847249, 0.000630851, 0.020794353, 0.035979974, 0.006165474,
0.027793267, 0.00504312, 0.018927097, 0.000760576, 0.012289583,
0.002109001, 0.000442817, 0.000594334, 0.00037428, 0.06596126,
0.027988907, 0.019067461, 0.024872467, 0.015379713, 0.015295277,
9.36e-05, 0.000117979, 4e-05, 0.031220784, 0.001357913, 0.040951957,
0.000438858, 0.038880733, 0.00115553, 0.041152387, 0.042576251,
0.002254845, 0.022345729, 0.002596388, 0.022562024, 0.000243528,
0.000885187, 0.013339204, 0.001418329, 0.028089687, 0.002057198,
0.000244579, 0.000140129, 0.051721762, 0.014989271, 0.001673642,
0.04500578, 0.001615416, 0.00010688, 8.18e-05, 0.000526549, 0.024849247,
0.032961749, 0.033875354, 0.032145136, 0.012619383, 0.003522134,
0.012225185, 0.043464039, 0.077400519, 0.056308327, 0.020638077,
0.049992043, 0.038864222, 0.039459316, 0.034937031, 0.037406742,
0.029987413, 0.002413193, 0.000584526, 0.004584848, 0.012491496,
0.031710331, 0.017858395, 0.030812232, 0.003435739, 0.02648106,
0.006927007, 0.030785802, 0.044329986, 0.009838859, 0.002951219,
0.030722621, 0.020511401, 0.013623405, 0.081263322, 0.029623712,
0.003790876, 0.00335598, 0.018842609, 0.008430911, 0.032611226,
0.057455638, 0.004304486, 0.015733474, 0.043981231, 7.95e-05,
0.004054158, 0.045173701, 0.016378658, 0.015906368, 2.92e-05,
0.00057313, 0.00079682, 0.013209159, 0.039911915, 0.000237856,
0.022373161, 0.015821272, 0.026750309, 0.048698356, 0.041430357,
0.00287091, 0.007965338, 0.034481633, 0.001543219, 0.022152119,
0.041801127, 0.017463336, 0.038010604, 0.050393079, 0.045031199,
0.043613378, 0.037411148, 0.00186188, 0.018962051, 0.043254408,
0.018666636, 0.027696462, 0.024293257, 0.062711642, 0.000519461,
0.001056595, 0.031300324, 0.024742217, 0.024718682, 0.000780182,
0.01862668, 0.000973041, 0.000542227, 0.001011475, 0.011077226
), Transfers = c(0.00473547, 0.00038783, 0.000424567, 0.000695775,
0.000135175, 0.010334213, 0.000106781, 0.003008423, 0.000608193,
0.010326284, 0.000934925, 0.031277279, 0.00572826, 0.000260722,
0.001021529, 0.000154104, 0.000220061, 4.32e-05, 0.018335222,
0.013011634, 2.49e-05, 4.83e-05, 0.021935677, 0.000390414, 0.000130749,
3.77e-05, 0.009460382, 0.146681735, 7.44e-05, 0.082389135, 0.000592343,
0.000562132, 1.53e-05, 0.020403948, 1.31e-05, 2.46e-05, 5.51e-05,
0.000321357, 0.037377138, 0.006516009, 0.022055996, 0.041838049,
0.002549792, 0.00271147, 8.55e-05, 0.001550897, 0.001094715,
0.002059784, 2.73e-05, 0.012813067, 9.84e-06, 0.009924993, 8.74e-05,
0.004619721, 0.013069859, 2.14e-05, 0.053722696, 5.79e-05, 0.006753522,
1.18e-05, 0.005116721, 0.000108002, 2.73e-05, 0.003596542, 2.79e-05,
0.00438903, 8.31e-05, 0.026310482, 0.001005592, 0.000428282,
0.049529581, 1.93e-05, 8.57e-05, 0.001610554, 9.92e-06, 0.094923027,
0.031919217, 0.13955002, 0.083229087, 0.000284159, 0.000267466,
0.000349366, 0.056697448, 0.049064161, 0.075636951, 0.004204928,
0.006115066, 0.007264789, 0.002044115, 0.043477142, 0.046506897,
0.082070827, 0.00035585, 0.010126049, 0.000178782, 0.000133394,
0.019258021, 9.19e-05, 0.069771158, 0.164961859, 0.030302868,
0.008376654, 0.095394069, 0.069931231, 0.000553351, 0.000544636,
0.095332857, 0.001748097, 0.000288915, 0.049584358, 0.095331287,
0.000598831, 0.001574565, 0.124263691, 3.34e-05, 0.107925558,
0.087354139, 0.000618826, 0.000110399, 0.035831715, 5.52e-06,
0.003000538, 0.076722556, 0.001625612, 0.00057855, 2.15e-05,
6.78e-05, 0.000268523, 0.000567245, 0.04113056, 1.71e-05, 0.03401376,
0.001848523, 0.029357767, 0.078771496, 0.05552954, 0.068487283,
0.001617493, 0.045003856, 0.000170027, 0.102169304, 0.033286348,
0.000645582, 0.123061518, 0.024437451, 0.002628661, 0.013120533,
0.002000205, 0.000545963, 0.103891281, 0.01547252, 0.004918401,
0.032767954, 0.084638687, 0.093356166, 0.000156201, 0.000752217,
0.109659324, 0.208642497, 0.208474925, 0.000404265, 0.078084401,
0.000538784, 0.012066067, 0.018067282, 0.000205862), Collab = c(0.030001488,
0.036707564, 0.01458121, 0.026231048, 0.018525526, 0.011553297,
0.058634057, 0.001686141, 0.001348074, 0.006757227, 0.013508918,
0.003715637, 0.002921306, 0.009278328, 0.004626478, 0.002879119,
0.055770088, 0.095661212, 0.017193222, 0.004260887, 0.0994825,
0.094794299, 0.00236101, 0.05708391, 0.070789976, 0.093534164,
0.001109712, 0.009766358, 0.033402635, 0.011669702, 0.06682796,
0.001608723, 0.076258585, 0.0177607, 0.081032098, 0.094412392,
0.105163053, 0.000130001, 0.000308904, 0.000673957, 0.000108183,
0.006185235, 0.001417778, 0.001392482, 0.001763266, 4.19e-05,
0.000316372, 0.000538187, 0.057255911, 0.000888558, 0.117687659,
0.002003037, 0.068194122, 0.000653657, 0.000152612, 0.089555908,
0.002829031, 0.032391752, 0.000114824, 0.001213285, 0.000386851,
0.015705495, 0.049863754, 0.000186015, 0.036288112, 0.000121075,
0.001514642, 0.00150885, 0.000594681, 0.139375952, 0.002323917,
0.075647519, 0.002870689, 3.77e-05, 0.077144908, 0.026437255,
0.000115174, 0.00227099, 0.004700389, 0.041492391, 0.122675327,
0.020817113, 6.89e-05, 0.000303617, 0.000137477, 0.001432608,
0.000184365, 0.001050974, 0.000709209, 0.000270104, 0.000303001,
0.018320147, 0.099247105, 0.082998488, 0.000888759, 0.016183068,
0.006294048, 0.002853816, 0.019514895, 0.038458183, 0.002923949,
0.106293548, 0.011739459, 0.000128574, 0.007004556, 0.114129525,
0.012154148, 0.00942754, 0.009594396, 1.79e-05, 0.003734627,
8.05e-06, 0.119908919, 0.018081544, 0.075305864, 0.008538072,
0.000172614, 0.011539718, 0.001156176, 2.3e-05, 0.06492041, 0.12754611,
0.00024379, 0.006267908, 0.00306844, 0.001193837, 0.013286424,
0.113241894, 0.00550093, 0.000513184, 0.164987722, 0.008430982,
0.01127053, 0.00073653, 0.000330426, 0.002238095, 0.104762755,
0.010050252, 0.000469937, 0.145991698, 0.016278919, 0.000640692,
0.005282822, 0.005445685, 0.00014593, 0.000589578, 0.003085291,
0.003763146, 0.118843056, 0.019891671, 0.007112815, 0.004553507,
0.014161345, 0.011043344, 1.65e-05, 0.05419503, 0.107074967,
0.01952576, 0.015831838, 0.015618949, 0.133629759, 0.016718132,
0.120940954, 0.072855599, 0.066799617, 0.006925232)), .Names = c("Filename",
"Funds", "Biotech", "Transfers", "Collab"), class = "data.frame", row.names = c(NA,
-166L))
答案 0 :(得分:0)
正如@Cath所提到的,你的问题在于文本列。删除它有效:
EUTopicNetworks2 <- EUTopicNetworks[,-1]
class(EUTopicNetworks2)
library(cluster)
FundDist <- as.matrix(daisy(EUTopicNetworks2, metric = "euclidean", stand = TRUE))
答案 1 :(得分:0)
通过运行此代码,我能够回答我在其中一条评论中提出的问题
row.names(EUTopicNetworks) <- EUTopicNetworks[,1]
EUTopicNetworks <- EUTopicNetworks[,-1]
library(cluster)
FundDist <- as.matrix(daisy(EUTopicNetworks, metric = "euclidean", stand = TRUE))