如何按域和目录对排序列表中的URL进行分组?
如果两个URL具有相同的目录(只是域后的第一个),则应将它们分组到一个数组中;
第一个目录不同但域名相同的网址应按数组分组;
例如,此列表中的网址:
var url_list = ["https://www.facebook.com/impression.php/f2e61d9df/?lid=115",
"https://www.facebook.com/plugins/like.php?app_id=5",
"https://www.facebook.com/tr/a/?id=228037074239568",
"https://www.facebook.com/tr/b/?ev=ViewContent",
"http://www.marvel.com/abc?f=33",
"http://www.marvel.com/games?a=11",
"http://www.marvel.com/games?z=22",
"http://www.marvel.com/videos"]
应分组如下:
var group_url = [
["https://www.facebook.com/impression.php/f2e61d9df/?lid=115","https://www.facebook.com/plugins/like.php?app_id=5",],
["https://www.facebook.com/tr/a/?id=228037074239568","https://www.facebook.com/tr/b/?ev=ViewContent"],
["http://www.marvel.com/abc?f=33","http://www.marvel.com/videos"],
["http://www.marvel.com/games?a=11","http://www.marvel.com/games?z=22"]
]
我写了一些代码但只设法按域分组网址:
var group_url = [];
var count = 0;
var url_list = ["https://www.facebook.com/impression.php/f2e61d9df/?lid=115",
"https://www.facebook.com/plugins/like.php?app_id=5",
"https://www.facebook.com/tr/?id=228037074239568",
"https://www.facebook.com/tr/?ev=ViewContent",
"http://www.marvel.com/abc?f=33",
"http://www.marvel.com/games?a=11",
"http://www.marvel.com/games?z=22",
"http://www.marvel.com/videos"]
for(i = 0; i < url_list.length; i++) {
if(url_list[i] != "") {
var current = url_list[i].replace(/.*?:\/\//g, "");
var check = current.substr(0, current.indexOf('/'));
group_url.push([])
for(var j = i; j < url_list.length; j++) {
var add_url = url_list[j];
if(add_url.indexOf(check) != -1) {
group_url[count].push(add_url);
url_list[j] = "";
}
else {
break;
}
}
count += 1;
}
}
console.log(JSON.stringify(group_url));
答案 0 :(得分:2)
您似乎希望按域+目录对URL进行分组,但如果它们最终在其组中单独存在,则只能按域重新组合这些URL。
为此您可以使用此脚本(ES5):
var url_list = ["https://www.facebook.com/impression.php/f2e61d9df/?lid=115",
"https://www.facebook.com/plugins/like.php?app_id=5",
"https://www.facebook.com/tr/a/?id=228037074239568",
"https://www.facebook.com/tr/b/?ev=ViewContent",
"http://www.marvel.com/abc?f=33",
"http://www.marvel.com/games?a=11",
"http://www.marvel.com/games?z=22",
"http://www.marvel.com/videos"];
// Group the URLs, keyed by domain+dir
var hash = url_list.reduce(function (hash, url) {
// ignore protocol, and extract domain and first dir:
var domAndDir = url.replace(/^.*?:\/\//, '').match(/^.*?\..*?\/[^\/?#]*/)[0];
hash[domAndDir] = (hash[domAndDir] || []).concat(url);
return hash;
}, {});
// Regroup URLs by domain only, when they are alone for their domain+dir
Object.keys(hash).forEach(function (domAndDir) {
if (hash[domAndDir].length == 1) {
var domain = domAndDir.match(/.*\//)[0];
hash[domain] = (hash[domain] || []).concat(hash[domAndDir]);
delete hash[domAndDir];
}
});
// Convert hash to array
var result = Object.keys(hash).map(function(key) {
return hash[key];
});
// Output result
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }
注意:我没有像你在评论中提到ES5那样使用ES6,但考虑使用ES6 Map
来获得这样的哈希,这更适合这项工作。
答案 1 :(得分:0)
_renderItem: function (ul, item) {
var t = '',
result = '';
searchedText = this.term;
$.each(this.options.columns, function (index, column) {
console.log(item);
var reg = new RegExp('(' +searchedText+ ')', 'gi');
t += '<span style="padding:0 4px;float:left;width:' + column.width + ';">' + item[column.valueField ? column.valueField : index].replace(reg, "<span style='color: red'>$1</span>") + '</span>'
});
result = $('<li></li>')
.data('ui-autocomplete-item', item)
.append('<a class="mcacAnchor">' + t + '<div style="clear: both;"></div></a>')
.appendTo(ul);
return result;
}
像这样使用:
urllistextended=url_list.map(function(el){return el.split("://")[1].split("/");});//remove protocol, split by /
var obj={};
for(var index in urllistextended){
var el=urllistextended[index];
obj[el[0]]=obj[el[0]]||{};
obj[el[0]][el[1]]=obj[el[0]][el[1]]||[];
obj[el[0]][el[1]].push(url_list[index]);
}
答案 2 :(得分:0)
我建议使用优秀的URI.js库,它提供了解析,查询和操作网址的绝佳方法:http://medialize.github.io/URI.js/
例如,要使用路径(您所指的目录),您可以轻松地执行以下操作(直接从api文档中获取):
var uri = new URI("http://example.org/foo/hello.html");
// get pathname
uri.pathname(); // returns string "/foo/hello.html"
// set pathname
uri.pathname("/foo/hello.html"); // returns the URI instance for chaining
// will encode for you
uri.pathname("/hello world/");
uri.pathname() === "/hello%20world/";
// will decode for you
uri.pathname(true) === "/hello world/";
// will return empty string for empty paths, but:
URI("").path() === "";
URI("/").path() === "/";
URI("http://example.org").path() === "/";
其余的应该很容易。
答案 3 :(得分:0)
我建议按域使用对象和分组,然后使用域后的第一个字符串。然后迭代树并将其减少到所需的结构。
此解决方案适用于未分类的数据。
var url_list = ["https://www.facebook.com/impression.php/f2e61d9df/?lid=115", "https://www.facebook.com/plugins/like.php?app_id=5", "https://www.facebook.com/tr/a/?id=228037074239568", "https://www.facebook.com/tr/b/?ev=ViewContent", "http://www.marvel.com/abc?f=33", "http://www.marvel.com/games?a=11", "http://www.marvel.com/games?z=22", "http://www.marvel.com/videos"],
temp = [],
result;
url_list.forEach(function (a) {
var m = a.match(/.*?:\/\/([^\/]+)\/?([^\/?]+)?/);
m.shift();
m.reduce(function (r, b) {
if (!r[b]) {
r[b] = { _: [] };
r._.push({ name: b, children: r[b]._ });
}
return r[b];
}, this)._.push(a);
}, { _: temp });
result = temp.reduce(function (r, a) {
var top = [],
parts = [];
a.children.forEach(function (b) {
if (b.children.length === 1) {
top.push(b.children[0]);
} else {
parts.push(b.children);
}
});
return top.length ? r.concat([top], parts) : r.concat(parts);
}, []);
console.log(result);
&#13;
.as-console-wrapper { max-height: 100% !important; top: 0; }
&#13;
答案 4 :(得分:0)
这完全符合您的需要:
var url_list = ["https://www.facebook.com/impression.php/f2e61d9df/?lid=115",
"https://www.facebook.com/plugins/like.php?app_id=5",
"https://www.facebook.com/tr/a/?id=228037074239568",
"https://www.facebook.com/tr/b/?ev=ViewContent",
"http://www.marvel.com/abc?f=33",
"http://www.marvel.com/games?a=11",
"http://www.marvel.com/games?z=22",
"http://www.marvel.com/videos"];
var folderGroups = {};
for (var i = 0; i < url_list.length; i++) {
var myRegexp = /.*\/\/[^\/]+\/[^\/\?]+/g;
var match = myRegexp.exec(url_list[i]);
var keyForUrl = match[0];
if (folderGroups[keyForUrl] == null) {
folderGroups[keyForUrl] = [];
}
folderGroups[keyForUrl].push(url_list[i]);
}
var toRemove = [];
Object.keys(folderGroups).forEach(function(key,index) {
if (folderGroups[key].length == 1) {
toRemove.push(key);
}
});
for (var i = 0; i < toRemove.length; i++) {
delete folderGroups[toRemove[i]];
}
//console.log(folderGroups);
var domainGroups = {};
for (var i = 0; i < url_list.length; i++) {
//Check if collected previously
var myRegexpPrev = /.*\/\/[^\/]+\/[^\/\?]+/g;
var matchPrev = myRegexpPrev.exec(url_list[i]);
var checkIfPrevSelected = matchPrev[0];
debugger;
if (folderGroups[checkIfPrevSelected] != null) {
continue;
}
//Get for domain group
var myRegexp = /.*\/\/[^\/]+/g;
var match = myRegexp.exec(url_list[i]);
var keyForUrl = match[0];
if (domainGroups[keyForUrl] == null) {
domainGroups[keyForUrl] = [];
}
domainGroups[keyForUrl].push(url_list[i]);
}
//console.log(domainGroups);
var finalResult = {};
$.extend(finalResult, folderGroups, domainGroups);
console.log(Object.values(finalResult));
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>