尝试根据大小将Shapefile分为多个部分。如果Shapefile的zip文件的文件大小超过10 MB,则会将其分成大块。
之前,我们使用Geopandas来完成此任务,虽然工作正常,但要消耗更多的RAM才能完成此任务。因此,请尝试使用PyShp库。
主要问题是它正在生成分割的文件,但是没有记录插入到Shapefile中,并且其中也缺少DBF文件。
我在以下代码中缺少什么吗,请告诉我
import os
import math
import csv
import zipfile
import shutil
from shutil import copyfile
import shapefile
path = '<shapefile_data_path>'
storage_path = '<path_to_extract_zip_file>'
current_dir = '<path_for_divided_shapefiles>'
ALLOWED_SIZE = 10
procs = []
# Here filepath means Shapefile's zip file path
def function_name(filepath):
file_name = file_path.split('/')[-1]
name = file_name.split('.zip')[0]
storage_file = os.path.join(storage_path, file_name)
storage_file = storage_file.replace('\\', '/')
src = path +'/'+file_path
shutil.copy(src,storage_file)
statinfo = os.stat(storage_file)
if (statinfo.st_size >> 20) > ALLOWED_SIZE:
storage_path_1 = storage_path + '/' + name
zip_ref = zipfile.ZipFile(storage_file)
zip_ref.extractall(storage_path_1)
zip_ref.close()
prj_file_path = ''
for _file1 in os.listdir(storage_path_1):
print _file1
if _file1.endswith('.prj'):
prj_file_path = os.path.join(storage_path_1, _file1)
for _file1 in os.listdir(storage_path_1):
if _file1.endswith('.shp'):
file_size = statinfo.st_size >> 20
parts = int(math.ceil(float(file_size) / float(ALLOWED_SIZE)))
# data = gpd.read_file(storage_path_1 + '/' + _file1)
data = shapefile.Reader(storage_path_1 + '/' + _file1)
records = data.records()
num_lines = len(data)
increment = int(num_lines / parts)
start_index = 0
part = 1
file_name_new = file_name.split('.zip')[0]
while start_index < num_lines:
part_name = '{1}_part{0}'.format(part, file_name_new)
outpath = os.path.join(current_dir, part_name)
os.mkdir(outpath)
outfile = os.path.join(outpath, part_name)
end_index = start_index + increment
if end_index > num_lines:
end_index = num_lines
chunk = records[start_index:end_index]
with open(outfile,'w') as f:
f.write(str(chunk))
copyfile(prj_file_path, os.path.join(outpath, file_name_new+'.prj'))
shutil.make_archive(outpath, 'zip', outpath)
shutil.rmtree(outpath)
start_index = end_index
part += 1
在写入新shapefile时,在生成的zip文件中面临困难。 DBF文件丢失,SHP文件没有正确的记录
非常感谢您的帮助。