我已经解决了问题,但无法提出通过所有测试用例的最有效问题。在5个测试用例中超时。
确定句子包含一个短语的所有单词
0:克里斯和詹妮弗今天早上吵架了
1:克里斯去度假了
2:珍妮弗在监狱里查询短语是
0:克里斯·詹妮弗
1:詹妮弗
2:监狱目标是为每个查询找到匹配语句的索引,如果不存在匹配语句,则为-1。单词顺序无关紧要。
输出:
0
0 2
2
即 第一个查询在句子0中具有匹配的单词,第二个在句子0和1中具有匹配的单词,依此类推。
约束
输入格式:
3
克里斯和詹妮弗今天早上吵架了
克里斯去度假了
珍妮弗在监狱里
3
克里斯·詹妮弗
珍妮弗
监狱
每个3代表句子或查询的数量。
以下是我尝试过的...
1。我的第一个解决方案:
let p =句子中最大的单词数
令k =查询中最大的单词数
大O是 O(npk)
public static void textQueries(List<String> sentences, List<String> queries) {
List<Map<String, Integer>> sentenceMaps = createMaps(sentences);
String results = queryMatcher(sentenceMaps, queries);
System.out.println(results);
}
private static String queryMatcher(List<Map<String, Integer>> sentenceMaps, List<String> queries) {
Map<String, Integer> wordCounter = new LinkedHashMap<>();
List<List<String>> results = new ArrayList<List<String>>();
for (String query : queries) {
List<String> result = new ArrayList<>();
for (int j = 0; j < sentenceMaps.size(); j++) {
if (isQueryFound(sentenceMaps.get(j), query, wordCounter)) {
result.add(j + "");
}
}
results.add(result);
}
return generateResultString(results);
}
/*
* StringBuilder used to reduce delays of calling multiple System.out.println();
*/
private static String generateResultString(List<List<String>> results) {
StringBuilder stringBuilder = new StringBuilder();
for (List<String> matchingSentenceIndexes : results) {
if (matchingSentenceIndexes.isEmpty()) {
stringBuilder.append("-1\n");
} else {
resultStringHelper(matchingSentenceIndexes, stringBuilder);
}
//stringBuilder.append("\n");
}
return stringBuilder.toString();
}
/*
* add " " for multiple indexes result
*/
private static void resultStringHelper(List<String> result, StringBuilder stringBuilder) {
for (int i = 0; i < result.size(); i++) {
stringBuilder.append(result.get(i));
if (i < result.size() - 1) {
stringBuilder.append(" ");
} else if (i == result.size() - 1) {
stringBuilder.append("\n");
}
}
}
private static boolean isQueryFound(Map<String, Integer> sentenceMap, String query, Map<String, Integer> wordCounter) {
String[] queryTokens = query.split(" ");
for (String queryToken : queryTokens) {
if (isMoreThan10Sentences(wordCounter, queryToken)) return false;
if (sentenceMap.containsKey(queryToken)) {
wordCounter.put(queryToken, wordCounter.getOrDefault(queryToken, 0) + 1);
} else {
return false;
}
}
return true;
}
private static boolean isMoreThan10Sentences(Map<String, Integer> wordCounter, String token) {
return wordCounter.getOrDefault(token, -1) > 10;
}
private static Map<String, Integer> initMap(String[] tokens) {
Map<String, Integer> map = new LinkedHashMap<>();
for (String token : tokens) {
map.put(token, 0);
}
return map;
}
private static List<Map<String, Integer>> createMaps(List<String> sentences) {
List<Map<String, Integer>> maps = new ArrayList<Map<String,Integer>>();
for (int i = 0; i < sentences.size(); i++) {
String[] tokens = sentences.get(i).split(" ");
maps.add(initMap(tokens));
}
return maps;
}
最近5个测试用例中的超时。
对于小型测试用例,其在线编码服务器上的基准如下:
地图创建时间:9.23954E-4
查询匹配时间:3.85751E-4
地图生成非常昂贵。
2。我的第二次尝试:
相似的逻辑,但应用了并发性,因为该平台最多支持2个线程。
多线程在这里完成:
1.句子->地图生成(并发地图生成)
2.查询匹配(并发匹配)
public static void textQueries(List<String> sentences, List<String> queries) {
List<Map<String, Integer>> sentenceMaps = createMaps(sentences);
startTime = System.nanoTime();
String results = queryMatcher(sentenceMaps, queries);
System.out.println(results);
private static String queryMatcher(List<Map<String, Integer>> sentenceMaps, List<String> queries) {
List<Future<String>> futures = new ArrayList<Future<String>>();
int threads = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(threads);
String[] results = new String[threads];
int length = queries.size() / threads;
for (int i = 0; i < threads; i++) {
int queryStart = length * i;
int queryEnd = length * (i+1);
if (i == threads -1 && queries.size() % threads != 0) queryEnd++;
Callable<String> worker = new QueryMatcher(sentenceMaps, queries, queryStart, queryEnd);
Future<String> submit = executor.submit(worker);
futures.add(submit);
}
for (int i = 0; i < futures.size(); i++) {
try {
results[i] = futures.get(i).get();
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
}
String returnString = concaString(results);
executor.shutdown();
return returnString;
}
private static String concaString(String[] results) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < results.length; i++) {
stringBuilder.append(results[i]);
}
return stringBuilder.toString();
}
private static String generateResultString(List<List<String>> results) {
StringBuilder stringBuilder = new StringBuilder();
for (List<String> matchingSentenceIndexes : results) {
if (matchingSentenceIndexes.isEmpty()) {
stringBuilder.append("-1\n");
} else {
resultStringHelper(matchingSentenceIndexes, stringBuilder);
}
//stringBuilder.append("\n");
}
return stringBuilder.toString();
}
private static void resultStringHelper(List<String> result, StringBuilder stringBuilder) {
for (int i = 0; i < result.size(); i++) {
stringBuilder.append(result.get(i));
if (i < result.size() - 1) {
stringBuilder.append(" ");
} else if (i == result.size() - 1) {
stringBuilder.append("\n");
}
}
}
private static boolean isQueryFound(Map<String, Integer> sentenceMap, String query, Map<String, Integer> wordCounter) {
String[] queryTokens = query.split(" ");
for (String queryToken : queryTokens) {
if (isMoreThan10Sentences(wordCounter, queryToken)) return false;
if (sentenceMap.containsKey(queryToken)) {
wordCounter.put(queryToken, wordCounter.getOrDefault(queryToken, 0) + 1);
} else {
return false;
}
}
return true;
}
private static boolean isMoreThan10Sentences(Map<String, Integer> wordCounter, String token) {
return wordCounter.getOrDefault(token, -1) > 10;
}
private static boolean isQueryFound(Map<String, Integer> sentenceMap, String query) {
String[] queryTokens = query.split(" ");
//Map<String, Integer> duplicateChecker = new LinkedHashMap<String, Integer>();
for (String queryToken : queryTokens) {
if (sentenceMap.containsKey(queryToken)) {
//if (!duplicateChecker(duplicateChecker, sentenceMap, queryToken))
//return false;
} else {
return false;
}
}
return true;
}
/*
* this method checks for the case when there are duplicate words in query
* i.e. sentence containing 2 hello will return false of queries with 3 hello
*/
private static boolean duplicateChecker(Map<String, Integer> duplicateChecker, Map<String, Integer> sentenceMap, String queryToken) {
if (duplicateChecker.containsKey(queryToken)) {
if (duplicateChecker.get(queryToken) == 0) return false;
duplicateChecker.put(queryToken, duplicateChecker.get(queryToken) - 1);
} else {
duplicateChecker.put(queryToken, sentenceMap.get(queryToken) - 1);
}
return true;
}
private static List<Map<String, Integer>> createMaps(List<String> sentences) {
List<Map<String, Integer>> maps = new ArrayList<>();
int threads = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(threads);
List<Future<List<Map<String, Integer>>>> futures = new ArrayList<Future<List<Map<String, Integer>>>>();
int length = (sentences.size()) / threads;
for (int i = 0; i < threads; i++) {
int start = i * length;
int end = (i+1) * length;
if (i == threads - 1 && sentences.size() % threads != 0) end++;
List<String> splitSentence = new ArrayList(sentences.subList(start, end));
Callable<List<Map<String, Integer>>> worker = new MapMaker(splitSentence);
Future<List<Map<String, Integer>>> submit = executor.submit(worker);
futures.add(submit);
}
for (int i = 0; i < futures.size(); i++) {
try {
for (Map<String, Integer> map : futures.get(i).get()) {
maps.add(map);
}
} catch (InterruptedException e) {
e.printStackTrace();
} catch (ExecutionException e) {
e.printStackTrace();
}
}
executor.shutdown();
return maps;
}
private synchronized static Map<String, Integer> initMap(String[] tokens) {
Map<String, Integer> map = new LinkedHashMap<>();
for (String token : tokens) {
map.put(token, 0);
// map.put(token, map.getOrDefault(map.get(token), 1) + 1);
}
return map;
}
public static class MapMaker implements Callable<List<Map<String, Integer>>> {
private List<String> sentences;
@Override
public List<Map<String, Integer>> call() throws Exception {
List<Map<String, Integer>> maps = new ArrayList<Map<String,Integer>>();
for (int i = 0; i < sentences.size(); i++) {
String[] tokens = sentences.get(i).split(" ");
maps.add(initMap(tokens));
}
return maps;
}
public MapMaker(List<String> sentences) {
this.sentences = sentences;
}
}
public static class QueryMatcher implements Callable<String> {
private List<Map<String, Integer>> sentenceMaps;
private List<String> queries;
private int queryStart;
private int queryEnd;
@Override
public String call() throws Exception {
List<List<String>> results = new ArrayList<List<String>>();
for (int i = queryStart; i < queryEnd; i++) {
List<String> result = new ArrayList<>();
String query = queries.get(i);
for (int j = 0; j < sentenceMaps.size(); j++) {
if (isQueryFound(sentenceMaps.get(j), query)) {
result.add(j + "");
}
}
results.add(result);
}
return generateResultString(results);
}
public QueryMatcher(List<Map<String, Integer>> sentenceMaps, List<String> queries, int queryStart, int queryEnd) {
this.sentenceMaps = sentenceMaps;
this.queries = queries;
this.queryStart = queryStart;
this.queryEnd = queryEnd;
}
}
尽管我希望大型测试用例能够有所提速,但它仍然使5个测试用例超时。
对于小型测试用例,由于创建池的额外开销而增加了地图生成时间。
基准时间:
地图时间:0.007669489
查询匹配时间:3.22923E-4
3。我的第三个解决方案-用C ++编码以上代码
我质疑是否可能是Java导致超时。
该平台实际上为C ++提供了更短的计算时间,所以令我惊讶的是,它仍然给出了5个相同的超时时间。
4。我的第四种正则表达式,
我知道它会慢一些,但是我还是徒劳地尝试了。 Big O实际上在这里比较慢,因为我需要按单词对每个句子进行排序以避免n!正则表达式的排列...
public static void textQueries(List<String> sentences, List<String> queries) {
stringSort(sentences);
stringSort(queries);
StringBuilder stringBuilder = new StringBuilder();
boolean isExist = false;
for (int index = 0; index < queries.size(); index++) {
String query = queries.get(index);
isExist = false;
for (int i = 0; i < sentences.size(); i++) {
if (Matcher(buildNaturalLanguage(query), sentences.get(i))) {
stringBuilder.append(i + " ");
isExist = true;
}
}
if (!isExist) stringBuilder.append("-1");
if (index != queries.size() - 1) stringBuilder.append("\n");
}
System.out.println(stringBuilder.toString());
}
private static void stringSort(List<String> strings) {
for (int i = 0; i < strings.size(); ++i) {
String string = strings.get(i);
String[] stringParts = string.split(" ");
StringBuilder stringBuilder = new StringBuilder();
Arrays.sort(stringParts);
for (int j = 0; j < stringParts.length; j++) {
stringBuilder.append(stringParts[j] + " ");
}
strings.set(i, stringBuilder.toString()); // sure I made it back to string for code cleaness but you can return String[] for efficiency.. But only minor improvement.
}
}
private static String buildNaturalLanguage(String query) {
// System.out.println("query " + query);
String[] stringParts = query.split(" ");
String regular = "(([a-zA-Z])*(\\s))*";
for (String word : stringParts) {
regular += word + "(\\s(([a-zA-Z])*(\\s))*)";
}
return regular;
}
private static boolean Matcher(String regular, String sentence) {
Pattern p = Pattern.compile(regular);
Matcher m = p.matcher(sentence);
return m.find();
}
结果: 不仅会超时,而且还会以某种方式在另外两个未公开的测试用例上引起错误(错误答案)。我不知道为什么。
Ω(nm ^ 2 + plogp) ..假设正则表达式匹配为O(m)
我只能想到在运行主算法之前就过滤掉一些查询或句子的可能性吗? (约束:每个单词最多10个匹配项)。
这个约束检查部分仍然在我的第一个和第二个解决方案中实现。因此可能需要更智能的过滤。
我认为BCR-最好的比率是O(MNP),您仍然需要遍历每个查询和句子,并且如果不使用正则表达式也要拆分它们。
我在这里完全迷失了,如何才能真正提高速度呢?
非常感谢。
答案 0 :(得分:5)
维护一个HashMap
,它将String
映射到Set<Int>
。这个想法是要跟踪给定单词出现在哪些句子中。我们使用集合而不是数组来支持有效地计算两个集合的交集。
对于每个输入句子:
对于每个查询词组:
时间复杂度:考虑到每个句子中有10个单词,构建HashMap的成本为O(10N log N)。每个查询的成本为O(10 * log(N))。
答案 1 :(得分:1)
我有以下想法可能会加快速度,这似乎与Rishav提出的想法类似:
public static void main(String[] args) throws FileNotFoundException {
Scanner sc = new Scanner(new FileInputStream("file.txt"));
int numberOfSentences = Integer.parseInt(sc.nextLine());
Set<Integer> sentences = new HashSet<Integer>();
Map<String, Set<Integer>> words2Sentences = new HashMap<String, Set<Integer>>();
for (int i = 0; i < numberOfSentences; i++) {
String words[] = sc.nextLine().split(" ");
for (int j = 0; j < words.length; j++) {
if (!words2Sentences.containsKey(words[j])) {
words2Sentences.put(words[j], new HashSet<Integer>());
}
words2Sentences.get(words[j]).add(i);
}
sentences.add(i);
}
int numberOfPhrases = Integer.parseInt(sc.nextLine());
List<Set<Integer>> phraseResults = new ArrayList<Set<Integer>>();
for (int i = 0; i < numberOfPhrases; i++) {
Set<String> phrases = new HashSet<String>(Arrays.asList(sc.nextLine().split(" ")));
Set<Integer> result = new HashSet(sentences);
for (String s : phrases) {
result.retainAll(words2Sentences.get(s));
}
phraseResults.add(result);
}
for (Set<Integer> set : phraseResults) {
for (Integer i : set) {
System.out.print(i);
}
System.out.println();
}
}
答案 2 :(得分:0)
rank()
答案 3 :(得分:0)
这种方法应该可行。
#include <bits/stdc++.h>
using namespace std;
vector<set<int>> getres(vector<string> sentences, vector<string> phrases, vector<set<int>> v){
map<string,set<int>> m;
map<string,set<int>> :: iterator itr;
for(int i=0;i<sentences.size();i++){
string temp = sentences[i];
temp.push_back(' ');
string word = "";
for(int j=0;j<temp.length();j++){
if(temp[j] == ' '){
itr = m.find(word);
if(itr == m.end()){
set<int> s;
s.insert(i);
m.insert({word,s});
}
else if(itr != m.end()){
itr->second.insert(i);
}
word = "";
}
else{
word.push_back(temp[j]);
}
}
}
// for(itr = m.begin();itr!= m.end();itr++){
// cout<<itr->first <<" ";
// for(auto f= itr->second.begin();f!= itr->second.end();f++){
// cout<<*f<<" ";
// }
// cout<<endl;
// }
for(int i=0;i<phrases.size();i++){
string temp = phrases[i];
temp.push_back(' ');
string word = "";
int flag = 0;
set<int> s1,s2,s3;
for(int j=0;j<temp.length();j++){
if(temp[j] == ' '){
// cout<<"yes";
itr = m.find(word);
if(itr == m.end()){
flag = 1;
break;
}
else if(itr != m.end()){
if(s1.empty()){
s1 = itr->second;
}
else{
set_intersection(s1.begin(),s1.end(),itr->second.begin(),itr->second.end(),inserter(s3,s3.begin()));
s1 = s3;
s3.clear();
if(s1.empty()){
flag = 1;
break;
}
}
// for(auto f=s1.begin();f!= s1.end();f++){
// cout<<*f<<" ";
// }
// cout<<endl;
}
word = "";
}
else{
word.push_back(temp[j]);
}
}
if(flag == 1){
s1.clear();
s1.insert(-1);
v[i] = s1;
flag = 0 ;
}
else{
v[i] = s1;
}
s1.clear();
s2.clear();
s3.clear();
}
return v;
}
int main() {
vector<string> sentences = {"chris and jennifer had a fight this morning", "chris went on a holiday", "jennifer is in prison"};
vector<string> phrases = {"chris jennifer", "jennifer", "prison"};
vector<set<int>> v(phrases.size());
v = getres(sentences,phrases,v);
for(int i=0;i<v.size();i++){
set<int> :: iterator itr;
for(itr = v[i].begin() ;itr != v[i].end();itr++){
cout<<*itr<<" ";
}
cout<<endl;
}
// cout<<"finish"<<endl;
}