使用查找和排序MongoDB进行查询

时间:2013-08-06 12:51:12

标签: xml mongodb pymongo

我正在尝试对MongoDB运行查询并使用pymongo将结果写为单独的xml文件。当我只使用排序而不添加任何查找条件时,我可以使查询起作用。我想要做的是能够插入各种查找条件,并让脚本基于此写出xml。 该脚本适用于这一行:

def find_id_sort():
    for post in db.video_md_fcsvr.find({}, ).sort("_id", ):

但是,如果我尝试添加这样的查找条件;

def find_id_sort():
    for post in db.video_md_fcsvr.find({}, {"streams.codec_name": "prores"}).sort("_id", ):

结果如下:

KeyError:'format' 函数find_id_sort在第43行的python_write_xml_toFile.py中 format_data = post ['format']

完整的脚本在这里:

import sys
import os
import xml.etree.cElementTree as ET
import pymongo
from pymongo import MongoClient
from bson import Binary, Code
from bson.json_util import dumps  
import io, json
from itertools import groupby
from bson.objectid import ObjectId
import datetime


connection = MongoClient("localhost:27017", slave_okay=True)
db = connection['video_ingest_db']    


def find_id_sort():
    for post in db.video_md_fcsvr.find({}, {"streams.codec_name": "prores"}).sort("_id", ): # find all entries (with filename - add: "({}, {'format.filename':1})" and sort using _id 
        # GRAB VALUES FROM FIELDS --------------------------------------------------
        video_id_timestamp = post['_id'].generation_time # get the timestamp off the Object ID
        video_id = post['_id'] # get the ObjectId
        streams_data = post['streams']
        format_data = post['format']
        format_name = post['format']['format_name']
        format_long_name = post['format']['format_long_name']
        path_filename = post['format']['filename']

        codec_name_0 = post['streams'][0]['codec_name']
        codec_name_1 = post['streams'][1]['codec_name']
        #codec_name_2 = post['streams'][2]['codec_name']

        codec_type_0 = post['streams'][0]['codec_type']
        codec_type_1 = post['streams'][1]['codec_type']
        #codec_type_2 = post['streams'][2]['codec_type']

        frame_rate = post['streams'][0]['avg_frame_rate']
        video_height = post['streams'][0]['height'] 
        video_width = post['streams'][0]['width'] 

        #print "codec_name: %s" % streams_data_codec_name
        print "format name: %s " % format_name
        #print "Streams data: %s" % streams_data
        print "format data: %s" % format_data
        print "Video ID: %s" % video_id
        print "Creation Time: %s" % video_id_timestamp
        #print "Metadata: %s" % post

        # CONVERT OUT TO STRING -------------------------------------------------- 
        id_to_string = str(video_id) # convert the ObjectId to string
        timestamp_to_string = str(video_id_timestamp) 
        filename_to_string = str(path_filename)
        format_name_to_string = str(format_name)
        format_long_name_to_string = str(format_long_name)

        codec_name_0_to_str = str(codec_name_0)
        codec_name_1_to_str = str(codec_name_1)
        #codec_name_2_to_str = str(codec_name_2)

        codec_type_0_to_str = str(codec_type_0)
        codec_type_1_to_str = str(codec_type_1)
        #codec_type_2_to_str = str(codec_type_2)

        frame_rate_to_str = str(frame_rate)
        video_height_to_str = str(video_height)
        video_width_to_str = str(video_width)

        metadata_file_name = "/Users/mathiesj/Desktop/metadata/" + id_to_string + ".xml" # create the path and filenaming convention of the metadata files


        # WRITE VALUES TO XML FORMAT --------------------------------------------------
        root = ET.Element("video_metadata")
        metadata = ET.SubElement(root, "metadata")
        #streams = ET.SubElement(root, "streams")

        mongodb_id_field = ET.SubElement(metadata, "mongodb_id")
        mongodb_id_field.set("id", id_to_string)
        mongodb_id_field.text = "some value1" 

        creation_time_field = ET.SubElement(metadata, "creation_time")
        creation_time_field.set("time_stamp", timestamp_to_string)
        creation_time_field.text = "some value2"

        filename_field = ET.SubElement(metadata, "path_filename")
        filename_field.set("path_filename", filename_to_string)
        filename_field.text = "some value3"

        video_format_field = ET.SubElement(metadata, "video_format")
        video_format_field.set("video_format_name", format_name_to_string)
        video_format_field.set("video_format_long_name", format_long_name_to_string)
        video_format_field.text = "some value4"

        stream0_field = ET.SubElement(metadata, "stream_0")
        stream0_field.set("codec_type", codec_type_0_to_str)
        stream0_field.set("codec_name", codec_name_0_to_str)
        stream0_field.set("frame_rate", frame_rate_to_str)
        stream0_field.set("video_height", video_height_to_str)
        stream0_field.set("video_width", video_width_to_str)
        stream0_field.text = "some value5"

        stream1_field = ET.SubElement(metadata, "stream_1")
        stream1_field.set("codec_type", codec_type_1_to_str)
        stream1_field.set("codec_name", codec_name_1_to_str)
        stream1_field.text = "some value6"

        #stream2_field = ET.SubElement(technical_metadata, "stream 2")
        #stream2_field.set("codec_type", codec_type_2_to_str)
        #stream2_field.set("codec_name", codec_name_2_to_str)
        #stream2_field.text = "some value4"


        tree = ET.ElementTree(root)
        tree.write(metadata_file_name)




find_id_sort()

1 个答案:

答案 0 :(得分:2)

您的查询不应该是:

db.video_md_fcsvr.find({"streams.codec": "prores"}).sort( "_id", 1 ):

第一个参数是标准,第二个参数是投影,它选择要返回的字段。