将路径数组转换为数据结构

时间:2020-08-10 20:31:45

标签: javascript arrays algorithm

我有一系列这样的路径:

/doc/data/main.js
/doc/data/xl.js
/doc/data/dandu/sdasa.js
/mnt/data/la.js

我正在尝试构建以下结构:

{
  "directories": {
    "/doc/data": {
      "directories": {
        "dandu": {
          "files": {
            "sdasa.js": 1
          }
        }
      },
      "files": {
        "main.js": 1,
        "xl.js": 1
      }
    },
    "/mnt/data": {
      "directories": {},
      "files": {
        "la.js": 1
      }
    }
  },
  "files": {}
}

请忽略该示例中文件的值。将来,我将为此分配更复杂的数据。当前值为1。

从先前的topic中我发现我可以使用以下函数来获得相似的内容:

var parsePathArray = function() {
    var parsed = {};
    for(var i = 0; i < paths.length; i++) {
        var position = parsed;
        var split = paths[i].split('/');
        for(var j = 0; j < split.length; j++) {
            if(split[j] !== "") {
                if(typeof position[split[j]] === 'undefined')
                    position[split[j]] = {};
                position = position[split[j]];
            }
        }
    }
    return parsed;
}

该解决方案的主要问题是它会拆分每个目录。但是我不想拆分每个目录,而是获取包含至少一个文件的目录。例如,在我的示例中,/doc没有文件(只有目录-/data),因此我们继续进行操作。我尝试了一下更改功能,但是没有用:

var str = '';
for (var j = 0; j < split.length; j++) {
    if (j < split.length - 1 && typeof this.files[str] === 'undefined') {
        str += '/' + split[j];
        continue;
    }
    if (str !== '') {
        if (typeof this.files[str] === 'undefined')
            this.files[str] = {};
        this.files = this.files[str];
    }
}

将这些字符串转换为该数据结构的最佳方法是什么?

3 个答案:

答案 0 :(得分:1)

这是我想出的解决方案。它的工作方式是每次构建一条路径,并将其与现有数据结构进行比较。它也应该自己处理文件,因为您的原始帖子似乎暗示这是必要的。最后,我决定将其分为两个功能,因为这可能使解释更容易。

代码:

const paths = [
    '/doc/data/main.js',
    'doc/data/xl.js',
    '/etc/further/owy.js',
    '/etc/further/abc.js',
    'etc/mma.js',
    '/mnt/data/it.js',
    '/mnt/data/path/is/long/la.js',
    'mnt/data/path/is/la.js',
    '/doc/data/dandu/sdasa.js',
    '/etc/i/j/k/l/thing.js',
    '/etc/i/j/areallylongname.js',
    'thing.js'
];

function buildStructure(paths) {
    let structure = {
        directories: {},
        files: {}
    };

    const compare = (a, b) => {
        return a.split('/').length - b.split('/').length;
    };

    [...paths]
    .map(path => path = path.charAt(0) === '/' ? path : `/${path}`)
    .sort((a, b) => compare(a, b)).forEach(path => {
        const nodes = path.split('/').slice(1);
        const file = nodes.pop();
        
        let pointer = findDirectory(nodes[0] ? structure.directories : structure, '', [...nodes]);

        pointer.files = pointer.files || {};
        pointer.files = {
            ...pointer.files,
            [file]: 1
        };
    });

    return structure;
};

function findDirectory(pointer, subPath, nodes) {
    if (nodes.length === 0) {
        if (subPath) {
            pointer[subPath] = {};
            pointer = pointer[subPath];
        };
        return pointer;
    };

    let newPath = `${subPath}/${nodes[0]}`;
    nodes.shift();

    if (pointer[newPath]) {
        pointer = pointer[newPath];

        if (nodes.length >= 1) {
            pointer.directories = pointer.directories || {};
            pointer = pointer.directories;
        };

        newPath = '';
    };

    return findDirectory(pointer, newPath, nodes);
};

const structure = buildStructure(paths);
console.log(structure);
.as-console-wrapper { min-height: 100%!important; top: 0; }

说明:

这比我开始研究时想象的要复杂得多(并且有趣得多)。一旦开始连接目录,操作顺序就很重要。

buildStructure开始,我们在路径数组上进行映射,以捕获没有前导斜线的所有条目。然后,根据它们引用的目录数对它们进行排序。这样可以确保我们从结构的顶部向底部进行工作。

将每个路径分成节点数组,然后弹出文件字符串。你剩下这样的东西了:

const nodes = ['doc', 'data'];
const file = 'main.js';

现在,我们必须通过findDirectory来提供这些节点,以查找/创建文件的位置。变量pointer用于跟踪我们在structure对象中的位置,由于对指针的任何更改都共享引用相等性,因此我们对指针所做的任何更改都将在结构中复制。

findDirectory函数以递归方式处理每个节点,以逐渐建立完整的路径。每当我们创建structure目录中已经存在的路径时,我们就会在其中移动并重新开始构建该路径,以尝试找到下一个路径。如果找不到它,那么我们就有一个全新的目录。目的是当我们退出该功能时始终将其保存在正确的目录中-如果需要的话,可以一直创建它。

为简化起见,假设我们只有两条记录路径:

const paths = [
  'doc/data/main.js',
  'doc/data/dandu/sdasa.js'
];

对于第一个路径,findDirectory将进行三遍。这些是每次通过时都会赋予它的参数:

pointer = structure.directories > same > same

subPath = '' > '/doc' > '/doc/data'

nodes = ['doc', 'data'] > ['data'] > []

我们从未找到匹配项,因此函数退出时,它将在structure.directories上创建该目录。现在,第二条路径将经过四遍:

pointer = 
  structure.directories > 
  same > 
  structure.directories./doc/data.directories > 
  same

subPath = '' > '/doc' > '' > '/dandu' 

nodes = ['doc', 'data', 'dandu'] > ['data', 'dandu'] > ['dandu'] > []

如您所见,在第二遍中,我们创建了字符串/doc/data,它确实存在于structure.directories上。因此,我们进入其中,由于要处理的节点更多,因此我们在其中创建了一个新的目录对象,然后也输入了该对象。如果没有更多的要处理的节点,我们将知道已经达到了正确的级别,这将不是必需的。从这里开始,只是简单地再次构建路径并重复该过程。

一旦我们在正确的目录中,我们可以将文件直接放在指针上,并将其注册在结构上。一旦我们移至下一条路径,指针将再次指向structure.directories

如果没有要处理的节点(仅文件名),则将整个结构对象传递给findDirectory,文件将进入对象的顶层。


希望这可以很好地说明问题,对您有用。我很乐意为此工作,并希望就如何改进它提出任何建议。

答案 1 :(得分:0)

这个挑战确实不是那么容易。尽管如此,该方法还是可以工作的,易于理解和理解,因此可以维护子任务,从而达到OP的目标...

const pathList = [
  '/doc/data/main.js',
  '/doc/data/fame.js',
  '/doc/data/fame.es',
  '/doc/data/xl.js',
  '/doc/data/dandu/sdasa.js',

  '/mnt/data/la.js',
  '/mnt/la.es',

  'foo/bar/baz/biz/foo.js',
  'foo/bar/baz/biz/bar.js',
  '/foo/bar.js',
  '/foo/bar/baz/foo.js',
  'foo/bar/baz/bar.js',
  'foo/bar/baz/biz.js',

  '/foobar.js',
  'bazbiz.js',

  '/etc/further/owy.js',
  '/etc/further/abc.js',
  'etc/mma.js',
  '/etc/i/j/k/l/thing.js',
  '/etc/i/j/areallylongname.js'
];


function createSeparatedPathAndFileData(path) {
  const regXReplace = (/^\/+/);     // for replacing leading slash sequences in `path`.
  const regXSplit = (/\/([^/]*)$/); // for retrieving separated path- and file-name data.
  
  const filePartials = path.replace(regXReplace, '').split(regXSplit);
  if (filePartials.length === 1) {

    // assure at least an empty `pathName`.
    filePartials.unshift('');
  }
  const [pathName, fileName] = filePartials;

  return {
    pathName,
    fileName
  };
}

function compareByPathAndFileNameAndExtension(a, b) {
  const regXSplit = (/\.([^.]*)$/); // split for filename and captured file extension. 

  const [aName, aExtension] = a.fileName.split(regXSplit);
  const [bName, bExtension] = b.fileName.split(regXSplit);

  return (
       a.pathName.localeCompare(b.pathName)
    || aName.localeCompare(bName)
    || aExtension.localeCompare(bExtension)
  )
}


function getRightPathPartial(root, pathName) {
  let rightPartial = null; // null || string.

  const partials = pathName.split(`${ root }\/`);
  if ((partials.length === 2) && (partials[0] === '')) {

    rightPartial = partials[1];
  }
  return rightPartial; // null || string.
}

function getPathPartials(previousPartials, pathName) {
  let pathPartials = Array.from(previousPartials);
  let rightPartial;

  while (!rightPartial && pathPartials.pop() && (pathPartials.length >= 1)) {

    rightPartial = getRightPathPartial(pathPartials.join('\/'), pathName);
  }
  if (pathPartials.length === 0) {

    pathPartials.push(pathName);

  } else if (rightPartial) {

    pathPartials = pathPartials.concat(rightPartial);
  }
  return pathPartials;
}

function createPathPartialDataFromCurrentAndPreviousItem(fileData, idx, list) {
  const previousItem = list[idx - 1];
  if (previousItem) {

    const previousPathName = previousItem.pathName;
    const currentPathName = fileData.pathName;

    if (previousPathName === currentPathName) {

      // duplicate/copy path partials.
      fileData.pathPartials = [].concat(previousItem.pathPartials);

    } else {
      // a) try an instant match first ...

      const rightPartial = getRightPathPartial(previousPathName, currentPathName);
      if (rightPartial || (previousPathName === currentPathName)) {

        // concat path partials.
        fileData.pathPartials = previousItem.pathPartials.concat(rightPartial);

      } else {
        // ... before b) programmatically work back the root-path
        //               and look each time for another partial match.

        fileData.pathPartials = getPathPartials(
          previousItem.pathPartials,
          fileData.pathName
        );
      }
    }
  } else {
    // initialize partials by adding path name.
    fileData.pathPartials = [fileData.pathName];
  }
  return fileData;
}


function isUnassignedIndex(index) {
  return (Object.keys(index).length === 0);
}
function assignInitialIndexProperties(index) {
  return Object.assign(index, {
    directories: {},
    files: {}
  });
}

function assignFileDataToIndex(index, fileData) {
  if (isUnassignedIndex(index)) {
    assignInitialIndexProperties(index);
  }
  const { pathPartials, fileName } = fileData;

  let path, directories;
  let subIndex = index;

  while (path = pathPartials.shift()) {
    directories = subIndex.directories;

    if (path in directories) {

      subIndex = directories[path];
    } else {
      subIndex = directories[path] = assignInitialIndexProperties({});
    }
  }
  subIndex.files[fileName] = 1;

  return index;
}


console.log(
  'input :: path list ...',
  pathList
  //.map(createSeparatedPathAndFileData)
  //.sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex, {})
);
console.log(
  '1st :: create separated path and file data from the original list ...',
  pathList
    .map(createSeparatedPathAndFileData)
  //.sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex, {})
);
console.log(
  '2nd :: sort previous data by comparing path- and file-names and its extensions ...',
  pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex, {})
);
console.log(
  '3rd :: create partial path data from current/previous items of the sorted list ...',
  pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
    .map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex, {})
);
console.log(
  '4th :: output :: assemble final index from before created list of partial path data ...',
  pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
    .map(createPathPartialDataFromCurrentAndPreviousItem)
    .reduce(assignFileDataToIndex, {})
);
.as-console-wrapper { min-height: 100%!important; top: 0; }

...从上面的日志中可以看到,这些任务是...

消毒和(重新)构建/映射

  1. 通过删除可能的前导斜杠序列来对每个路径进行清理/标准化。
  2. 构建一个文件数据项列表,其中每个数据项都包含相应路径项的pathNamefileName(以后者的已清理/规范化形式显示)。

例如'/doc/data/dandu/sdasa.js'被映射到...

{
  "pathName": "doc/data/dandu",
  "fileName": "sdasa.js"
}

排序

通过比较以下两个当前映射文件数据项的属性来完成排序...

  1. 比较pathName
  2. fileName比较,不带扩展名
  3. 按文件扩展名比较

因此原始文件列表看起来像这样...

[
  '/doc/data/main.js',
  '/doc/data/fame.js',
  '/doc/data/fame.es',
  '/doc/data/dandu/sdasa.js',
  'foo/bar/baz/biz/bar.js',
  '/foo/bar.js',
  'foo/bar/baz/biz.js',
  '/foobar.js'
]

...(经过消毒/归一化的映射并)分类为类似的内容...

[{
  "pathName": "",
  "fileName": "foobar.js"
}, {
  "pathName": "doc/data",
  "fileName": "fame.es"
}, {
  "pathName": "doc/data",
  "fileName": "fame.js"
}, {
  "pathName": "doc/data",
  "fileName": "main.js"
}, {
  "pathName": "doc/data/dandu",
  "fileName": "sdasa.js"
}, {
  "pathName": "foo",
  "fileName": "bar.js"
}, {
  "pathName": "foo/bar/baz",
  "fileName": "biz.js"
}, {
  "pathName": "foo/bar/baz/biz",
  "fileName": "bar.js"
}]

排序是基本的,因为紧随其后的算法依赖于排序整齐的pathName

路径部分的分割和聚类

为了使此任务保持“愚蠢” ,它是通过映射过程完成的,该过程不仅使用当前处理的项目,还使用该项目的先前同级(或前任)。

通过将当前pathPartials与前一个分开,将建立另一个pathName列表。

例如'foo/bar/baz'将与前一个'foo'分开(通过正则表达式)。因此,'bar/baz'已经是一个聚集的部分路径,该路径将通过将该部分与之前的同级文件的pathPartials列表连接起来而用于创建当前文件数据项的pathPartials列表。时间是['foo']。因此前者的结果将是['foo', 'bar/baz']

'foo/bar/baz/biz'也会发生同样的情况,先前的路径名是'foo/bar/baz',而先前的部分列表是['foo', 'bar/baz']。拆分结果将为'biz',新的部分列表将为['foo', 'bar/baz', 'biz']

从上方排序的文件数据列表然后映射到该新列表中...

[{
  "pathName": "",
  "fileName": "foobar.js",
  "pathPartials": [
    ""
  ]
}, {
  "pathName": "doc/data",
  "fileName": "fame.es",
  "pathPartials": [
    "doc/data"
  ]
}, {
  "pathName": "doc/data",
  "fileName": "fame.js",
  "pathPartials": [
    "doc/data"
  ]
}, {
  "pathName": "doc/data",
  "fileName": "main.js",
  "pathPartials": [
    "doc/data"
  ]
}, {
  "pathName": "doc/data/dandu",
  "fileName": "sdasa.js",
  "pathPartials": [
    "doc/data",
    "dandu"
  ]
}, {
  "pathName": "foo",
  "fileName": "bar.js",
  "pathPartials": [
    "foo"
  ]
}, {
  "pathName": "foo/bar/baz",
  "fileName": "biz.js",
  "pathPartials": [
    "foo",
    "bar/baz"
  ]
}, {
  "pathName": "foo/bar/baz/biz",
  "fileName": "bar.js",
  "pathPartials": [
    "foo",
    "bar/baz",
    "biz"
  ]
}]

组装最终索引

最后一步是一个简单的列表精简任务,因为在这一点上,已经完成了正确分割和聚类每个路径部分的最困难的部分。

答案 2 :(得分:-1)

您可以使用某种递归函数来完成它。请记住,这只是一种可能的解决方案,可能不是最佳解决方案。

const workPath = (path, structure) => {
    if(!structure) structure = {};

    const folders = path.split("/");
    const file = folders.pop();

    // Check weather any of the possible paths are available
    let breakPoint = null;
    let tempPath;
    for(let i = 0; i< folders.length; i++){
        const copy = [... folders];
        tempPath = copy.splice(0, i+1).join("/");

        if(structure[tempPath]){
            breakPoint = i;
            break;
        }        
    }

    // If there was no path available, we create it in the structure
    if(breakPoint == null){
        const foldersPath = folders.join("/");
        structure[foldersPath]= {};
        structure[foldersPath]["files"] = {};
        structure[foldersPath]["files"][file] = 1;
    }

    // If there is a path inside of the structure, that also is the entire path we are working with,
    // We just add the file to the path
    else if(breakPoint && breakPoint == folders.length - 1){
        structure[folders.join("/")]["files"][file] = 1;
    }
    
    // If we get here, it means that some part of the path is available but not the entire path
    // So, we just call the workPath function recursively with only one portion of the path
    else{
        const subPath = folders.splice(breakPoint + 1).join("/") + "/" + file;
        
        structure[tempPath]["directories"] = workPath(subPath, structure[tempPath]["directories"]);  
    }

    return structure;
}

const convert = array => {
    let structure = {};
    for(let path of array){
        structure = workPath(path, structure);
    }

    return structure;
}

“转换”功能需要包含所有路径的数组。

请记住,此解决方案不会考虑其中没有文件的条目。