Question

尝试根据大小将Shapefile分为多个部分。如果Shapefile的zip文件的文件大小超过10 MB，则会将其分成大块。

之前，我们使用Geopandas来完成此任务，虽然工作正常，但要消耗更多的RAM才能完成此任务。因此，请尝试使用PyShp库。

主要问题是它正在生成分割的文件，但是没有记录插入到Shapefile中，并且其中也缺少DBF文件。

我在以下代码中缺少什么吗，请告诉我

import os
import math
import csv
import zipfile
import shutil
from shutil import copyfile
import shapefile

path = '<shapefile_data_path>'
storage_path = '<path_to_extract_zip_file>'
current_dir = '<path_for_divided_shapefiles>'
ALLOWED_SIZE = 10
procs = []

# Here filepath means Shapefile's zip file path
def function_name(filepath):
    file_name = file_path.split('/')[-1]
    name = file_name.split('.zip')[0]
    storage_file = os.path.join(storage_path, file_name)
    storage_file = storage_file.replace('\\', '/')
    src = path +'/'+file_path
    shutil.copy(src,storage_file)
    statinfo = os.stat(storage_file)
    if (statinfo.st_size >> 20) > ALLOWED_SIZE:
        storage_path_1 = storage_path + '/' + name
        zip_ref = zipfile.ZipFile(storage_file)
        zip_ref.extractall(storage_path_1)
        zip_ref.close()
        prj_file_path = ''
        for _file1 in os.listdir(storage_path_1):
            print _file1
            if _file1.endswith('.prj'):
              prj_file_path = os.path.join(storage_path_1, _file1)

        for _file1 in os.listdir(storage_path_1):
            if _file1.endswith('.shp'):
                file_size = statinfo.st_size >> 20
                parts = int(math.ceil(float(file_size) / float(ALLOWED_SIZE)))
                # data = gpd.read_file(storage_path_1 + '/' + _file1)
                data = shapefile.Reader(storage_path_1 + '/' + _file1)
                records = data.records()
                num_lines = len(data)
                increment = int(num_lines / parts)
                start_index = 0
                part = 1
                file_name_new = file_name.split('.zip')[0]

            while start_index < num_lines:
                part_name = '{1}_part{0}'.format(part, file_name_new)
                outpath = os.path.join(current_dir, part_name)
                os.mkdir(outpath)
                outfile = os.path.join(outpath, part_name)
                end_index = start_index + increment
                if end_index > num_lines:
                  end_index = num_lines

                chunk = records[start_index:end_index]
                with open(outfile,'w') as f:
                    f.write(str(chunk))

                copyfile(prj_file_path, os.path.join(outpath, file_name_new+'.prj'))
                shutil.make_archive(outpath, 'zip', outpath)
                shutil.rmtree(outpath)
                start_index = end_index
                part += 1

在写入新shapefile时，在生成的zip文件中面临困难。 DBF文件丢失，SHP文件没有正确的记录

非常感谢您的帮助。

使用PyShp将Shapefile分为多个部分

0 个答案: