Django:使用查询集/过滤创建类别

时间:2018-02-08 16:50:58

标签: python django csv django-models django-mptt

我试图弄清楚是否可以使用自定义过滤器创建类别。

我正在构建一个电子商务应用程序,我已经使用mptt设置了我的类别模型。我正在导入一个csv,它创建我的顶级类别,工作正常。问题是我需要具有更具体的子类别,例如男士服装(顶级)>牛仔裤。

csv有几个字段,其中包含与每种产品相关的信息,例如:" stone wash bootcut jeans"。我希望检查这些字段中的关键字,并将每个产品添加到正确的类别。是否可以通过这种方式设置类别,还是有替代解决方案?

我是django新手,所以感谢任何帮助。

models.py

from django.db import models
from mptt.models import MPTTModel, TreeForeignKey

class Category(MPTTModel):
    name = models.CharField(max_length=50, unique=True)
    parent = TreeForeignKey('self', null=True, blank=True, related_name='children', db_index=True, on_delete=models.CASCADE)
    slug = models.SlugField()

    class MPTTMeta:
        order_insertion_by = ['name']

    class Meta:
        unique_together = (('parent', 'slug',))
        verbose_name_plural = 'categories'

    def get_slug_list(self):
        try:
            ancestors = self.get_ancestors(include_self=True)
        except:
            ancestors = []
        else:
            ancestors = [ i.slug for i in ancestors]
        slugs = []
        for i in range(len(ancestors)):
            slugs.append('/'.join(ancestors[:i+1]))
        return slugs


    def __str__(self):
        return self.name

class Brands(models.Model):
    brand_name = models.CharField(max_length=500, default='')

    def __str__(self):
        return self.brand_name


class Product(models.Model):
    aw_deep_link = models.CharField(max_length=500, default='')
    description = models.CharField(max_length=500, default='')
    product_name = models.CharField(max_length=500, default='')
    aw_image_url = models.CharField(max_length=500, default='')
    search_price = models.DecimalField(max_digits=6, decimal_places=2, null=True)
    merchant_name = models.CharField(max_length=500, default='')
    display_price = models.CharField(max_length=500, default='')
    brand_name = TreeForeignKey('Brands', on_delete=models.CASCADE)
    colour = models.CharField(max_length=500, default='')
    rrp_price = models.DecimalField(max_digits=6, decimal_places=2, null=True)
    category = TreeForeignKey('Category',null=True,blank=True, on_delete=models.CASCADE)
    slug = models.SlugField(default='')

    def __str__(self):
        return self.product_name

importCSV.py

import re
from products.models import Category, Brands
from django.core.management.base import BaseCommand


class Command(BaseCommand):
    help = "Load some sample data into the db"

    def add_arguments(self, parser):
        parser.add_argument('--file', dest='file', help='File to load')

    def handle(self, **options):
        from products.models import Product

        if options['file']:
            print("Importing " + options['file'])

            with open(options['file']) as f:
                linecount = 0
                next(f)
                for line in f:
                    linecount += 1
                    fields = line.split(',')
                    category = Category.objects.get_or_create(name=fields[10])
                    brand_name = Brands.objects.get_or_create(brand_name=fields[7])

                    data = {
                            'aw_deep_link':  fields[0],
                            'description': fields[1],
                            'product_name': fields[2],
                            'aw_image_url':  fields[3],
                            'search_price':  fields[4],
                            'merchant_name': fields[5],
                            'display_price':  fields[6],
                            'brand_name':  brand_name[0],
                            'colour' :  fields[8],
                            'rrp_price' :  fields[9],
                            'category' :  category[0],

                    }

                    product = Product(**data)
                    product.save()

                print("Added {0} products".format(linecount))

1 个答案:

答案 0 :(得分:1)

所以你有

  • 可能包含多个关键字的手动预定义子类别
  • 每个产品的多个文本字段,可确保任何关键字至少出现一次

从这个设置开始,我首先尝试通过正则表达式来概括每个子类别的“搜索术语”,具体取决于识别子类别所需的条件的复杂性。很可能一个同义词列表已经足够了。将此类字段添加到Category模型(此处为正则表达式解决方案):

class Category(models.Model):
    regex = models.CharField(max_length=100, blank=True)  # only needed for subcategories (top level from csv)
    ...

对于trainersrunners相同的示例(据我所知,这些是复数字,因此等同于trainer或{ {1}}出现在任何地方),这将由(作为正则表达式)runner

这是你需要手动定义的部分 - 我不羡慕你所涉及的繁琐工作;)

之后,您的导入循环需要在此处进行一些更改:

r'trainers|runners'

在这里

def handle(self, **options):
    from products.models import Product, Category
    all_categories = list(Category.objects.all())
    # converted to list to evaluate Queryset and don't query again in the loop below

完整

                data = ...
                for textfield in ('description', 'product_name'):
                    # I suppose these are the two relevant fields to scan?
                    subcat = None
                    for cat in all_categories:
                        if re.search(cat.regex, data[textfield]) is not None:
                            if cat.is_leaf_node():
                                # only consider nodes that have no children
                                subcat = cat
                                break
                    if subcat is not None:
                        break
                # subcat is now the first matching subcategory
                if subcat is not None:
                    data['category'] = subcat


                product = Product(**data)