我正在为我的web2py应用程序编写一个csv数据导入程序,该程序使用web2py调度程序将postgres db中的表填充为后台任务。
我认为我需要关闭打开的文件以释放打开文件占用的所有系统资源。我注意到,将数据上传到应用程序后,该应用程序的速度大大降低。
在我的情况下,我尝试使用stream.close()或csvfile.close,但似乎破坏了功能。也许我忽略了某些东西或缺少一些基本知识。这是我正在编写的功能。
def vecdyn_bulk_importer():
# reverse select by date to be set to select by oldest
dataset = db(db.data_set_bulk_upload.status == 'pending').select(orderby=~db.data_set_bulk_upload.submit_datetime).first()
if dataset != None:
try:
filename, csvfile = db.data_set_bulk_upload.csvfile.retrieve(dataset.csvfile)
readCSV = csv.reader(csvfile, delimiter=',')
next(readCSV, None)
# if any changes are madẹ to main collection template, these changes need to be reflected in the following slices
for row in readCSV:
# 'dict(zip' creates a dictionary from three lists i.e. field names and one data row from the csv
pubinfo = dict(zip(('title', 'dataset_citation', 'publication_citation',
'description', 'url', 'contact_name', 'contact_affiliation',
'email', 'orcid', 'dataset_license', 'project_identifier', 'publication_status'),
row[:12]))
# check to see if there is a collection author name in the db.collection_author table, if not insert it
# if pubinfo.collection_author != None:
# db.collection_author.update_or_insert(name=pubinfo.collection_author)
# Check for a match in the db against the 'pubinfo ' dict, note that this information will have a one to many relationship with metadata table
record_1 = db.publication_info(**pubinfo)
publication_info_id = record_1.id if record_1 else db.publication_info.insert(**pubinfo)
# 'dict(zip' creates a dictionary from three lists i.e. field names and one data row from the csv
study = dict(zip(('taxon', 'location_description', 'study_collection_area', 'geo_datum',
'gps_obfuscation_info', 'species_id_method', 'study_design', 'sampling_strategy',
'sampling_method', 'sampling_protocol', 'measurement_unit', 'value_transform'),
row[12:24]))
# Check for a match in the db against the 'study' dict
record_2 = db.study_meta_data(**study)
study_meta_data_id = record_2.id if record_2 else db.study_meta_data.insert(publication_info_id=publication_info_id, **study)
samples = dict(zip(('sample_start_date', 'sample_start_time',
'sample_end_date', 'sample_end_time', 'sample_value', 'sample_sex',
'sample_stage', 'sample_location', 'sample_collection_area', 'sample_lat_dd',
'sample_long_dd', 'sample_environment', 'additional_location_info',
'additional_sample_info', 'sample_name'), row[24:40]))
time_series_data = db.time_series_data.insert(study_meta_data_id=study_meta_data_id, publication_info_id=publication_info_id, **samples)
db.commit()
#add a send mailto here
except:
db.rollback()
dataset.update_record(status='failed')
db.commit()