我正在尝试编写一个api来使用压缩csv记录上的bulk_create
来处理大量数据行。但是在调用serializer.is_valid()
之前调用serializer.save()
来验证数据时,由于device_id(sensorReading_device)
的外键约束,需要很长时间才能验证。
我试过了prefetch_related()
& select_related()
和嵌套的序列化程序,性能相似或更差,因为我怀疑modelserializer由于外键而提交了n + 1个DB往返问题以进行验证。唯一有效的方法是删除我的模型中的外键并实现为charfield,它变得非常快,但这意味着将不再有外键约束。外键移除是前进的方式还是我错过了什么?
继承我的代码,非常感谢任何建议!
serializer.py
class SensorReadingListSerializer(serializers.ListSerializer):
def create(self, validated_data):
sensor_readings = [SensorReading(**item) for item in validated_data]
return SensorReading.objects.bulk_create(sensor_readings)
class SensorReadingSerializer(serializers.ModelSerializer)
device_qs = Device.objects.all()
sensorReading_device = PrimaryKeyRelatedField(label='SensorReading device', many=True, queryset=device_qs)
class Meta:
model = ReadingsModel.SensorReading
fields = ('id', 'device_timestamp', 'server_timestamp', 'payload', 'sensorReading_device')
list_serializer_class = SensorReadingListSerializer
model.py
class Device(models.Model):
device_id = models.CharField(primary_key=True, max_length=120)
device_deviceType = models.ForeignKey(DeviceType, on_delete=models.CASCADE)
device_softwareVersion = models.ForeignKey(SoftwareVersion, on_delete=models.CASCADE)
class SensorReading(models.Model):
device_timestamp = models.DateTimeField(default=datetime.today)
server_timestamp = models.DateTimeField(default=datetime.today)
payload = JSONField()
sensorReading_device = models.ForeignKey(Device, on_delete=models.CASCADE)
# sensorReading_device = models.CharField(max_length=120)
view.py
class SensorReadingViewSet(viewsets.ModelViewSet):
"""
API endpoint that allows SensorReading to be viewed or edited.
"""
serializer_class = SensorReadingSerializer
# queryset = SensorReading.objects.all().select_related('device')
# queryset = SensorReading.objects.prefetch_related('sensorReading_device').all()
queryset = SensorReading.objects.all()
parser_classes = (MultiPartParser, FormParser,)
@list_route(methods=['post'])
def post_readings_zip(self, request):
if not sys.warnoptions:
warnings.simplefilter("ignore")
# print(self.queryset.len())
request_file = request.FILES['zipped_file']
device_id = request.data['device_id']
# server_timestamp = datetime.datetime.today
if Device.objects.filter(device_id=device_id).exists():
print('device exist')
if zipfile.is_zipfile(request_file):
print('file is zip')
zf = zipfile.ZipFile(request_file)
filelist = zf.namelist()
data = []
print("Processing data: " + str(datetime.datetime.now()))
for csv_file_name in filelist:
with zf.open(csv_file_name) as reading_csv:
rc = reading_csv.read()
c = csv.StringIO(rc.decode())
for row in c:
row = row.rstrip()
data.append({'payload': {'data': row}, 'sensorReading_device': device_id})
print("Done processing csvs: " + str(datetime.datetime.now()))
# data = [{'payload': {'data': 'hello world payload 01'}, 'sensorReading_device': '001'},
# {'payload': {'data': 'hello world payload 02'}, 'sensorReading_device': '001'}]
# qs = SensorReadingSerializer.setup_eager_loading(self.queryset)
serializer = SensorReadingSerializer(data=data, many=True)
# print(repr(serializer))
print("Validating serializer: " + str(datetime.datetime.now()))
if serializer.is_valid():
print("Saving: " + str(datetime.datetime.now()))
serializer.save()
print("Done: " + str(datetime.datetime.now()))
# return JsonResponse(serializer.data, status=201, safe=False)
return JsonResponse({'status':serializer.data}, status=201, safe=False)
return JsonResponse(serializer.errors, status=400, safe=False)
else:
return JsonResponse(
{'status': status.HTTP_400_BAD_REQUEST, 'message': 'Bad Request: Uploaded file is not a zip'})
else:
return JsonResponse({'status': status.HTTP_400_BAD_REQUEST,'message': 'Device ID does not exist, please register'})