我正在尝试开发一种对附近站点进行二次采样的方法,并希望在邮政编码中定义一种系统的采样点方式。我想从两种预先设定的地点(已发布和已关闭的允许地点)中抽取子样本,然后我想在该区域(居住地点)中随机选择其他地点,同时确保新地点在整个空间上分布邮政编码的整个区域。但是,有不同数量的已发布站点(n = 411)和关闭站点(n = 111)。我想减少整体站点的数量,以便我抽取每种类型(已发布,关闭,住宅)的50个样本。
现在,我有一个.csv文件,其中包含已发行站点和已关闭站点的数量不平衡。基本上,我想从这两个现有组(每个组n = 50)中找到一种子采样方法,然后为整个区域中的新住宅站点选择50个新的随机站点。
在R中做到这一点的最佳方法是什么?我在想象围绕这些点创建一个多边形,然后将该多边形划分为网格吗?然后以某种方式选择每个网格X个点?或在这些点上设置某种距离约束,以使它们分布在整个空间区域。
任何有关如何对此进行编程的建议都会很棒。特别是,如何从相同区域中具有不同样本大小的已建立组中对相同数量的点(n = 50)进行子采样。然后在同一区域内采样新点。
如果有帮助,这里是重新制作数据框的截断版本的代码。
structure(list(Status = structure(c(2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Closed",
"Issued"), class = "factor"), Latitude = c(39.80296601, 39.80296601,
39.80331267, 39.802663, 39.80229056, 39.79744005, 39.79522843,
39.82467616, 39.79685336, 39.81087553, 39.79853855, 39.80349194,
39.79411796, 39.8182613, 39.79827808, 39.79632788, 39.79757134,
39.7937869, 39.7937869, 39.7937869, 39.81068301, 39.8101471,
39.7899709, 39.7902486, 39.78945589, 39.81034547, 39.81034547,
39.81703066, 39.81703066, 39.79430008, 39.81105806, 39.79654563,
39.81794504, 39.80747075, 39.7920428, 39.79573206, 39.80331195,
39.79421744, 39.79421744, 39.79421744, 39.81092431, 39.79119664,
39.81795999, 39.80579969, 39.80579969, 39.80531193, 39.79239147,
39.79239147, 39.79020706, 39.81813963, 39.81589411, 39.81450741,
39.81399073, 39.79360784, 39.79090567, 39.79551664, 39.79484417,
39.79064696, 39.80808188, 39.81120283, 39.82225852, 39.81309107,
39.79205945, 39.79160906, 39.79819709, 39.81669606, 39.80669405,
39.82445799, 39.80618515, 39.80133089, 39.790538, 39.81088506,
39.81070264, 39.82302047, 39.82239344, 39.82195393, 39.8037712,
39.8037712, 39.78954707, 39.79336712, 39.79889109, 39.81577414,
39.81577414, 39.8026805, 39.79189907, 39.80044429, 39.79811807,
39.8056941, 39.81823759, 39.81823759, 39.81925829, 39.8055675,
39.79951699, 39.81309351, 39.81309351, 39.8145066, 39.8214231,
39.82004781, 39.82409804), Longitude = c(-86.10367106, -86.10367106,
-86.10379883, -86.1029006, -86.10450493, -86.13134001, -86.13257417,
-86.12220837, -86.11433257, -86.06539075, -86.1074237, -86.06860451,
-86.10828268, -86.0728389, -86.12567673, -86.08301562, -86.11224302,
-86.11619645, -86.11619645, -86.11619645, -86.10255898, -86.06531268,
-86.09327353, -86.11616688, -86.06429333, -86.07919922, -86.07919922,
-86.13219974, -86.13219974, -86.10768731, -86.07003855, -86.10648678,
-86.13454252, -86.10296843, -86.13029975, -86.13287993, -86.10408973,
-86.10700698, -86.10700698, -86.10700698, -86.09641333, -86.11974575,
-86.13110523, -86.10996257, -86.10996257, -86.10930109, -86.12277044,
-86.12277044, -86.07969044, -86.13228667, -86.13497616, -86.12734099,
-86.12779891, -86.11335095, -86.12371895, -86.13258082, -86.13256532,
-86.08654295, -86.10133712, -86.07256574, -86.07459924, -86.10241967,
-86.12975226, -86.12779223, -86.13402745, -86.09476272, -86.10532194,
-86.12220672, -86.10455018, -86.10524494, -86.1293721, -86.10035669,
-86.10054898, -86.08318231, -86.1104644, -86.12203668, -86.12662631,
-86.12662631, -86.11478914, -86.13093994, -86.10274423, -86.07734542,
-86.07734542, -86.1094572, -86.12910019, -86.11281164, -86.08961054,
-86.10811797, -86.10940327, -86.10940327, -86.09937068, -86.10454236,
-86.10275384, -86.10221365, -86.10221365, -86.1276222, -86.11297687,
-86.11327227, -86.08072096), MatchType = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = "Exact", class = "factor")), class = "data.frame", row.names = c(NA,
-99L))