Question

我的项目：

我正在遍历文件夹中的shapefile，并运行一些计算以在输出shapefile中添加具有新值的新列

我的问题：

该计算对于第一次迭代是正确的。但是，然后将这些值作为列添加到每个后续shapefile中（而不是在每次迭代中进行新的计算）。下面是代码。此代码运行产生的最终列为：final_year，final_month，final_day，final_date。

我的代码：

library(rgdal)
library(tidyverse)
library(magrittr)
library(dplyr)

input_path<- "/Users/JohnDoe/Desktop/Zone_Fixup/Z4/Z4_Split/"
output_path<- "/Users/JohnDoe/Desktop/Zone_Fixup/Z4/Z4_Split_Out/"

files<- list.files(input_path, pattern = "[.]shp$")

for(f in files){

  ifile<- list.files(input_path, f)
  shp_paste<- paste(input_path, ifile, sep = "")
  tryCatch({shp0<- readOGR(shp_paste, verbose=FALSE)}, error = function(e){print("Error1.")})


  #Order shapefile by filename
  shp1<- as.data.frame(shp0)
  shp2<- shp1[order(shp1$filename),]

  #Sort final dates by relative length values. 
  #If it's increasing, it's day1; if it's decreasing it's day3, etc.

  shp2$final_day1<- ifelse(lag(shp2$Length1)<shp2$Length1, paste0(shp2$day1), paste0(shp2$day3))
  shp2$final_month1<- ifelse(lag(shp2$Length1)<shp2$Length1, paste0(shp2$month1), paste0(shp2$month3))
  shp2$final_year1<- ifelse(lag(shp2$Length1)<shp2$Length1, paste0(shp2$year1), paste0(shp2$year3))

  #Remove first NA value of each column
  if(is.na(shp2$final_day1[1])){
    ex1<- shp2$day1[1]
    ex2<- as.character(ex1)
    ex3<- as.numeric(ex2)
    shp2$final_day1[1]<- ex2
  }
  if(is.na(shp2$final_month1[1])){
    ex4<- shp2$month1[1]
    ex5<- as.character(ex4)
    ex6<- as.numeric(ex5)
    shp2$final_month1[1]<- ex5
  }
  if(is.na(shp2$final_year1[1])){
    ex7<- shp2$year1[1]
    ex8<- as.character(ex7)
    ex9<- as.numeric(ex8)
    shp2$final_year1[1]<- ex9
  }

  #Add final dates to shapefile as new columns
  shp0$final_year<- shp2$final_year1
  shp0$final_month<- shp2$final_month1
  shp0$final_day<- shp2$final_day1
  final_paste<- paste(shp0$final_year, "_", shp0$final_month, "_", shp0$final_day, sep = "")
  shp0$final_date<- final_paste 

  #Create new shapefile for write out
  shp44<- shp0


  #Write out shapefile
  ifile1<- substring(ifile, 1, nchar(ifile)-4)
  #tryCatch({writeOGR(shp44, output_path, layer = ifile1, driver = "ESRI Shapefile", overwrite_layer = TRUE)}, error = function(e){print("Error2.")})
  test1<- head(shp44)
  print(test1)

}

我的结果： 这是两个head（）表。第一张表是正确的。第二张表不正确。请注意，两个表中的final_year，final_month，final_day和final_year列相同。注意：这些列是表格的最后四列

表1：

           coordinates  Length1     Bathy Vector                               filename  zone year1 year2 year3 month1 month2 month3 day1 day2 day3 final_year final_month final_day final_date
1 (-477786.3, 1110917) 29577.64 -6.455580      0 Zone4_2000_02_05_2000_02_15_2000_02_24 Zone4  2000  2000  2000     02     02     02   05   15   24       1997          02        15 1997_02_15
2 (-477786.3, 1110917) 29577.64 -6.455580      0 Zone4_2000_02_24_2000_03_10_2000_03_17 Zone4  2000  2000  2000     02     03     03   24   10   17       1997          03        26 1997_03_26
3 (-477848.2, 1113468) 27025.88 -2.100153      0 Zone4_2000_03_24_2000_04_03_2000_04_10 Zone4  2000  2000  2000     03     04     04   24   03   10       1997          04        19 1997_04_19
4   (-477871, 1114406) 26087.98 -4.700025      0 Zone4_2006_03_10_2006_03_27_2006_04_03 Zone4  2006  2006  2006     03     03     04   10   27   03       1998          02        08 1998_02_08
5 (-477876.1, 1114616) 25877.25 -7.598877      0 Zone4_2008_03_06_2008_03_16_2008_03_25 Zone4  2008  2008  2008     03     03     03   06   16   25       1998          03        28 1998_03_28
6 (-477878.8, 1114730) 25764.14 -7.598877      0 Zone4_2008_03_30_2008_04_09_2008_04_23 Zone4  2008  2008  2008     03     04     04   30   09   23       1998          04        21 1998_04_21

表2：

coordinates  Length1     Bathy Vector                               filename  zone year1 year2 year3 month1 month2 month3 day1 day2 day3 final_year final_month final_day final_date
1 (-477813.5, 1110939) 29612.26 -6.455580      1 Zone4_2000_02_05_2000_02_15_2000_02_24 Zone4  2000  2000  2000     02     02     02   05   15   24       1997          02        15 1997_02_15
2 (-477813.5, 1110939) 29612.26 -6.455580      1 Zone4_2000_02_24_2000_03_10_2000_03_17 Zone4  2000  2000  2000     02     03     03   24   10   17       1997          03        26 1997_03_26
3 (-477883.4, 1113392) 27158.05 -2.100153      1 Zone4_2000_03_24_2000_04_03_2000_04_10 Zone4  2000  2000  2000     03     04     04   24   03   10       1997          04        19 1997_04_19
4 (-477909.9, 1114319) 26230.17 -4.700025      1 Zone4_2006_03_10_2006_03_27_2006_04_03 Zone4  2006  2006  2006     03     03     04   10   27   03       1998          02        08 1998_02_08
5 (-477916.7, 1114558) 25991.57 -7.598877      1 Zone4_2008_03_06_2008_03_16_2008_03_25 Zone4  2008  2008  2008     03     03     03   06   16   25       1998          03        28 1998_03_28
6 (-477920.1, 1114678) 25871.39 -7.598877      1 Zone4_2008_03_30_2008_04_09_2008_04_23 Zone4  2008  2008  2008     03     04     04   30   09   23       1998          04        21 1998_04_21

似乎我的代码正在从第一次迭代中获取列值，并在随后的迭代中将它们添加到shapefile中。如何修改我的代码以在每次迭代中运行新的计算，并将这些唯一值添加到它们各自的shapefile中？

谢谢

Answer 1

我认为您的问题可能出在您的for循环的开始。

files<- list.files(input_path, pattern = "[.]shp$") #keep this line to get your files

for (f in 1:length(files)){ # change this to the length of files to iterate over files one by one

  ifile<- list.files(input_path, f) #delete this line from your code
  shp_paste<-paste(input_path,files[f],sep="") # use this line to iterate over each shp file

其余的代码保持原样，看看是否有帮助。

Answer 2

谢谢大家的帮助，我发现了问题。有点尴尬，我没有在添加新列之前按升序对文件名进行排序。因此，新列中的值似乎是错误的，因为它们与正确的行不匹配。感谢所有提供建议的人，这是我的笨拙错误。

For循环结果仅对R

2 个答案: