给定评论段落和关键字,从段落中找到包含所有关键字的最小长度片段。如果有数百万的评论,您会做什么预处理步骤。 第一部分很简单,只是最小的窗口问题。现在,对于预处理,我使用倒排索引。因此,对于每次审核,我都会构建一个存储每个单词出现列表的表。现在,当查询到来时,我检索每个单词的索引列表。现在,有没有办法在O(n)时间内从这组列表中找出最小窗口长度?我尝试构建min和max堆来存储每个列表的当前索引,然后跟踪最小窗口长度(使用两个堆的根)。然后我执行extractMin操作并从最大堆中删除相同的元素。为了保持最大堆中每个元素的位置地址(用于删除),我维护一个哈希表。现在从提取元素所属的列表中,我将下一个元素插入到堆中并根据需要更改窗口长度。这需要O(nlog n)时间。是否可以在O(n)时间内完成这项工作?
答案 0 :(得分:1)
假设这个组合在这里排序是我将如何做到的:
答案 1 :(得分:0)
#include<bits/stdc++.h>
using namespace std;
map<string,int>word;
void functionlower(string& str){
transform(str.begin(),str.end(),str.begin(),::tolower);
}
string compareWord(string& str){
string temp;
temp.resize(str.size());
transform(str.begin(),str.end(),temp.begin(),::tolower);
return temp;
}
int main(){
int total_word;
cin>>total_word;
for(int i=0;i<total_word;i++){
string str;
cin>>str;
functionlower(str);
word.insert({str,0});
}
cin.ignore();
string str;
vector<string>para;
getline(cin,str);
int index=0;
for(int i=0;i<=str.size();i++){
if(i==str.size()||str[i]==' '){para.push_back(str.substr(index,i-index)); index=i+1;}
}
int currlen=0;
int currpos=0;
int lenprint=0;
int olen=-1;
int opos=-1;
for(int i=0;i<para.size();i++){
string search=compareWord(para[i]);
if(word.find(search)!=word.end()){
if(word[search]==0)currlen++;
word[search]++;
}
while(currlen>=word.size()){
search=compareWord(para[currpos]);
if((i-currpos)<olen||olen==-1){
olen=i-currpos;
opos=currpos;
}
if(word.find(search)!=word.end()){
if(word[search]==1)break;
word[search]--;
currpos++;
lenprint=i;
}else currpos++;
}
}
for(int i=0;i<=olen;i++){
cout<<para[opos+i]<<" ";
}
cout<<endl;
return 0;
}
O(nlogk)其中k是需要搜索的单词数
答案 2 :(得分:0)
假设wordLength为常数,则可以以O(n)时间复杂度实现此解决方案,其中n是para中的单词数;这是在Java中实现的代码:
package Basic.MinSnippetWithAllKeywords;
import java.util.*;
/**
* Given a review paragraph and keywords,
* find minimum length snippet from paragraph which contains all keywords in any order.
*/
public class Solution {
public String minSnippet(String para, Set<String> keywords) {
LinkedList<Integer> deque = new LinkedList<>();
String[] words = para.split("\\s");
for (int i = 0; i < words.length; ++i) {
if(keywords.contains(words[i]))
deque.offer(i);
}
while(deque.size() > 1) {
int first = deque.pollFirst();
int second = deque.peekFirst();
if (words[first] != words[second]) {
deque.offerFirst(first);
break;
}
}
while(deque.size() > 1) {
int first = deque.pollLast();
int second = deque.peekLast();
if(words[first] != words[second]) {
deque.offerLast(first);
break;
}
}
if (deque.isEmpty())
return "";
return String.join(" ",
Arrays.copyOfRange(words, deque.peekFirst(), deque.peekLast() + 1));
}
/*
Example:
my name is shubham mishra
is name
*/
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
String para = sc.nextLine();
String keyLine = sc.nextLine();
Set<String> keywords = new HashSet<>();
keywords.addAll(Arrays.asList(keyLine.split("\\s")));
System.out.println(new Solution().minSnippet(para, keywords));
}
}