从具有大型数据集的对象数组中检索对象的最快方法(50k记录)

时间:2016-02-11 15:13:24

标签: arrays object search find underscore.js

这里totalObjects包含大约40k条记录,我从中找到一个存在于sapObjs(20k记录)中的对象。所以当我找到对象时,我试图改变它的属性值。对于这个操作,它需要超过200秒。任何人都可以建议我克服这个问题并提高性能。我已经尝试了_.find的替代方法,如过滤器和其他数组函数,但我得到了下划线js给出的结果比那些更快。但是下划线据说速度较慢,我想通过其他方式来做到这一点,所以任何人都可以帮助我解决这个问题。

        for (i = 0,sapLength = sapObjs.length;i < sapLength; i++) {
            debugger;


            currRecord = entriesInserted[sapObjs[i].name];
            if (currRecord) {

                dummyObj = _.find(totalObjects,{name:sapObjs[i].name});

                dummyObj["sap_desc"] = sapObjs[i].sap_desc;
                dummyObj.source = "";

            }
            else {
                entriesInserted[sapObjs[i].name] = sapOpbs[i];

                totalObjects.push(sapObjs[i]);

            }
        }

1 个答案:

答案 0 :(得分:0)

创建查找哈希。这是一个粗略的页面,我一起黑客说明。使用_.find,测试耗时超过400秒。使用查找散列,大约需要40ms。几乎是即时的。

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
    <script src="//code.jquery.com/jquery-2.2.0.min.js"></script>
    <script src="//cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js"></script>

    <script type="text/javascript">

        // using globals so this can be run in separate steps.
        var source_sapObjs = [];
        var source_totalObjects = [];

        function createArrays(){
            jQuery.ajax({
                url: 'https://raw.githubusercontent.com/dwyl/english-words/master/words.txt',
                dataType: 'text',
                type: 'GET'
            }).done(function(textFile){

                var words = textFile.split('\n');

                // create an array with 20k and 40k records.
                for(var i = 0; i < 20000; i++){
                    source_sapObjs.push({name: words[i], sap_desc: 'DESC-' + words[i]});
                }
                for(var j = 0; j < 40000; j++){
                    source_totalObjects.push({name: words[j], sap_desc: 'none'});
                }

                console.log("source_sapObjs.length:", source_sapObjs.length);
                console.log("source_totalObjects.length:", source_totalObjects.length);
                console.log("finished creating arrays");
            });
        }

        function iter_arrays(){
            var dummyObj = null;
            var sapObjs = source_sapObjs.slice(); // create a copy so I can re-run this multiple times.
            var totalObjects = source_totalObjects.slice(); // create a copy so I can re-run this multiple times.


            var start = new Date().getTime();

            var totalObjectsHash = {};
            for(var k = 0; k < totalObjects.length; k++){
                totalObjectsHash[totalObjects[k].name] = totalObjects[k];
            }

            for (var i = 0, sapLength = sapObjs.length; i < sapLength; i++) {
                    //dummyObj = _.find(totalObjects,{name:sapObjs[i].name});
                    dummyObj = totalObjectsHash[sapObjs[i].name];
                    dummyObj["sap_desc"] = sapObjs[i].sap_desc;
                    dummyObj.source = "";
            }

            var end = new Date().getTime();
            var time = end - start;
            console.log('Execution time: ' + time);
            console.log(_.map(totalObjects, 'sap_desc'));

        }

    </script>

</head>
<body>
</body>
</html>

要运行它,请在计算机上本地创建页面,在chrome中打开它,打开开发人员工具,然后从控制台运行第一个函数。完成后,运行第二个函数。