Question

我有一个C ++应用程序，从终端执行这个应用程序给出了完美的结果。它接受一个字符串（文本文件）和另一个字符串（查询）作为参数。但是当我执行相同的应用程序从python脚本传递相同的参数时，它只是不读取整个文本文件，从而提供不正确的输出。我已经使用ifstream从给定的参数文件中读取并处理了数据以插入到trie中（我不认为应用程序的详细信息在这里）。我可以看到，在从Python脚本执行应用程序的情况下，并未插入文本文件的所有单词。

subprocess.call([path_to_app , file parameter , query])
os.system(path_to_app + file paremeter + query)

我已经尝试了上述两个命令并且都给出了相同的结果（不正确）。

从终端和python脚本执行时显示每种情况下的输出

案例1：

Argument file received is /home/atul/DocumentSearchEngine/a1.txt
Arguemnt Query Received is data
6
here are the words Matching with query as prefix 
data database databases 
Total Score 6
Totalwords Inserted in the Trie are : 271

案例2：

Argument file received is /home/atul/DocumentSearchEngine/a1.txt
Arguemnt Query Received is data
0
here are the words Matching with query as prefix 

Total Score 0
Totalwords Inserted in the Trie are : 0

C ++应用程序代码

#include<bits/stdc++.h>

using namespace std;


struct Trie{
    int words;
    int prefixes;
    Trie* a[128];
};

void initialise(Trie *vertex){
    vertex->words = 0;
    vertex->prefixes =0;
    for(int i=0;i<128;i++){
        vertex->a[i] = NULL;
    }
}

int totalwords = 0;

void addWord(Trie *vertex , string word , int index){
    if(index == word.size()){
        vertex->words = vertex->words+1;
        vertex->prefixes = vertex->prefixes + 1;
        totalwords ++ ;
    }
    else{
        vertex->prefixes = vertex->prefixes + 1;
        int k = word[index];
        if(!vertex->a[k]){
            vertex->a[k] = new Trie();
            initialise(vertex->a[k]);
        }
        addWord(vertex->a[k],word,index+1);

    }
}

int countWords(Trie *vertex , string word, int index){
    int k = word[index];
    if(index == word.length()){
        return vertex->words;
    }
    else if(!vertex->a[k]){
        return 0;
    }
    else{
        return countWords(vertex->a[k],word,index+1);
    }
}

int countPrefixes(Trie *vertex , string prefix , int index){
    int k = prefix[index];
    if(index == prefix.length()){
        return vertex->prefixes;
    }
    else if(!vertex->a[k]){
        return 0;
    }
    else{
        return countPrefixes(vertex->a[k],prefix,index+1);
    }

}

void getWords(vector<string> &ans, Trie *vertex , string prefix){
    if(!vertex)
        return ;
    for(int i=0 ; i < 128 ;i++){
        if(vertex->a[i]){
            char x = (char)i;
            string dummy = prefix + x;
            if(vertex->a[i]->words){
                ans.push_back(dummy);
            }
            getWords(ans,vertex->a[i] , dummy);
        }
    }
}


void AllWords(Trie *vertex,  string prefix , int index ,vector<string> &ans){
    if(index == prefix.length()){
        if(vertex){
            if(vertex->words)
                ans.push_back(prefix);
            getWords(ans,vertex,prefix);
        }
    }
    else{
        int k = prefix[index];
        if(!vertex->a[k])
            return ;
        return AllWords(vertex->a[k] , prefix, index+1 , ans);
    }
}

int main(int argc , char *argv[]){

    string filename = argv[1];
    ifstream infile(filename);
    ofstream outfile;
    outfile.open("/home/atul/DocumentSearchEngine/result.txt" , ios_base::app);


    Trie *root = new Trie();
    initialise(root);
    string word;
    while(infile>>word){
        addWord(root,word,0);
    }

    string query=  argv[2];

    stringstream ss(query);

    cout<<"Argument file received is "<<filename<<endl;
    cout<<"Arguemnt Query Received is "<<query<<endl;

    double score =  0 ;
    int total_words = 0;
    while(getline(ss,word,' ')){
        total_words++;
        int x = countPrefixes(root,word,0);
        vector<string>ans ;
        cout<<x<<endl;
        score += x;
        cout<<"here are the words Matching with query as prefix \n";
        AllWords(root, word , 0 ,ans);
        for(auto x : ans){
            cout<<x<<" ";
        }
        cout<<endl;

    }
    score /= total_words;
    cout<<"Total Score "<<score<<endl;
    cout<<"Totalwords Inserted in the Trie are : "<<totalwords<<endl;
    outfile<<score<<endl;
}

Python脚本

infiles = ["a.txt" , "b.txt" , "c.txt" , "d.txt" ,"e.txt" , "f.txt" , "g.txt" , "h.txt" , "i.txt" , "j.txt" ]
outfiles = ["a1.txt" , "b1.txt" , "c1.txt" , "d1.txt" ,"e1.txt" , "f1.txt" , "g1.txt" , "h1.txt" , "i1.txt" , "j1.txt" ]

def Result(query):
    total_tries = 0
    for file in os.listdir("/home/atul/DocumentSearchEngine"):
        if file.endswith(".pdf"):
            print file
            command = "pdf2txt.py -o "+ infiles[total_tries] + " " + file
            print command
            os.system(command)
            f = open(infiles[total_tries] , "r")
            data = f.read().replace('\n',' ')
            data = dataProcssing(data)
            words = data.split()
            a = []
            for word in words:
                # print word
                if len(word)>=4:
                    a.append(word)
            output_file = open("/home/atul/DocumentSearchEngine/"+outfiles[total_tries],'w')
            for x in a:
                output_file.write(x)
                output_file.write(' ')
            curr = "/home/atul/DocumentSearchEngine/"+outfiles[total_tries]
            # print "sending file " + curr + "\n"
            # subprocess.call(['/home/atul/CODES/murguddin' , curr , query])
            command = "/home/atul/DocumentSearchEngine/./a.out "+curr+" "+query
            os.system(command)
            # print x
            # x = float(x)
            total_tries =  total_tries + 1
            # dic = {file : x}
            # score.update(dic)
    return total_tries

通过将pdf处理为文本

生成的示例输入文件

documentation splitsense introduction project solve problem splitting expenses among group friends feature simplifying debt which when used gives minimum number transactions among friends settle bills developed implemented greedy algorithm implement this feature used serve webpages dynamic data used mysql store transactions users while modelling every transaction directed graph what going first many users entered database every transactions among users there ways when single user pays when more than user pays form takes data from logged user about pays what uses what this data then processed stored text file edges with columns payer payee amount these edges stored databases need superior computation solve with multiple edges users where comes picture have used binary executable which computes required values transactions minimum transactions according algorithm mentioned below this writes output text file which later read file output shown user friendly html webpage thus this wrapped parts logic part other presentation part wrapped algorithm used simplifying debt feature uses greedy algorithm which goes like this maintain total worth each node amount given amount received received amount treated negative given amount treated positive involved transaction maintain priority queues nodes with total worth negative other total worth positive select element both priority queues check which smaller magnitude remove from their corresponding queues this removal represent transactions keep doing this till priority queues empty thus greedily select highest that what gives least number transactions clear debts conclusion this project just attempt solve simple problem splitting bills this project grown features where instead users database group users only involved transaction multiple challenges ahead terms scaling this security overall this good learning experience good exposure both internet technologies application algorithms

可能出现什么问题？任何帮助表示赞赏。

从python脚本执行时，C ++应用程序的行为有所不同

0 个答案: