从Apriori算法生成Eclat算法

时间:2011-12-27 22:03:24

标签: java data-mining apriori

我正在尝试将Apriori算法转换为Eclat算法。 我的Apriori算法以水平格式在垂直项目中执行事务,并返回第n个频繁项目集。

 Eclat算法我需要在垂直方向设置项目,并且应该在横向上处理事务。和我的Apriori一样,它需要返回项目集的交集     
项目集
0 1 1 0 1 1 1 0 0 1 1 1 1 1 1交易
0 1 1 0 1 1 1 0 0 1 0 1 1 1 0
0 1 1 0 0 1 1 0 0 1 0 1 1 1 1 0 0 1 0 1 1 1 0 0 1 1 1 1 1 1 0 1 1 0 1 1 0 1 0 1 1 1 1 1 0
0 1 1 1 1 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 0 0 1 0 1 1 1 1
1 1 1 1 1 1 0 0 0 1 1 1 1 0 1
0 1 1 1 1 1 0 0 0 1 0 1 1 0 1
1 1 1 1 1 0 0 0 0 1 1 1 1 0 0
转置它不是问题,通过水平搜索找到频繁项目集是。

private void generateCandidates(int n)
{
    Vector<String> tempCandidates = new Vector<String>(); //temporary candidate string vector
    String str1, str2; //strings that will be used for comparisons
    StringTokenizer st1, st2; //string tokenizers for the two itemsets being compared

    //if its the first set, candidates are just the numbers
    if(n==1)
    {
        for(int i=1; i<=numItems; i++)
        {
            tempCandidates.add(Integer.toString(i));
        }
    }
    else if(n==2) //second itemset is just all combinations of itemset 1
    {
        //add each itemset from the previous frequent itemsets together
        for(int i=0; i<candidates.size(); i++)
        {
            st1 = new StringTokenizer(candidates.get(i));
            str1 = st1.nextToken();
            for(int j=i+1; j<candidates.size(); j++)
            {
                st2 = new StringTokenizer(candidates.elementAt(j));
                str2 = st2.nextToken();
                tempCandidates.add(str1 + " " + str2);
            }
        }
    }
    else
    {
        //for each itemset
        for(int i=0; i<candidates.size(); i++)
        {
            //compare to the next itemset
            for(int j=i+1; j<candidates.size(); j++)
            {
                //create the strigns
                str1 = new String();
                str2 = new String();
                //create the tokenizers
                st1 = new StringTokenizer(candidates.get(i));
                st2 = new StringTokenizer(candidates.get(j));

                //make a string of the first n-2 tokens of the strings
                for(int s=0; s<n-2; s++)
                {
                    str1 = str1 + " " + st1.nextToken();
                    str2 = str2 + " " + st2.nextToken();
                }

                //if they have the same n-2 tokens, add them together
                if(str2.compareToIgnoreCase(str1)==0)
                    tempCandidates.add((str1 + " " + st1.nextToken() + " " + st2.nextToken()).trim());
            }
        }
    }
    //clear the old candidates
    candidates.clear();
    //set the new ones
    candidates = new Vector<String>(tempCandidates);
    tempCandidates.clear();
}

/************************************************************************
 * Method Name  : calculateFrequentItemsets
 * Purpose      : Determine which candidates are frequent in the n-th itemsets
 *              : from all possible candidates
 * Parameters   : n - iteger representing the current itemsets being evaluated
 * Return       : None
 *************************************************************************/
private void calculateFrequentItemsets(int n)
{
    Vector<String> frequentCandidates = new Vector<String>(); //the frequent candidates for the current itemset
    FileInputStream file_in; //file input stream
    BufferedReader data_in; //data input stream
    FileWriter fw;
    BufferedWriter file_out;

    StringTokenizer st, stFile; //tokenizer for candidate and transaction
    boolean match; //whether the transaction has all the items in an itemset
    boolean trans[] = new boolean[numItems]; //array to hold a transaction so that can be checked
    int count[] = new int[candidates.size()]; //the number of successful matches

    try
    {
            //output file
            fw= new FileWriter(outputFile, true);
            file_out = new BufferedWriter(fw);
            //load the transaction file
            file_in = new FileInputStream(transaFile);
            data_in = new BufferedReader(new InputStreamReader(file_in));

            //for each transaction
            for(int i=0; i<numTransactions; i++)
            {
                //System.out.println("Got here " + i + " times"); //useful to debug files that you are unsure of the number of line
                stFile = new StringTokenizer(data_in.readLine(), itemSep); //read a line from the file to the tokenizer
                //put the contents of that line into the transaction array
                for(int j=0; j<numItems; j++)
                {
                    trans[j]=(stFile.nextToken().compareToIgnoreCase(oneVal[j])==0); //if it is not a 0, assign the value to true
                }

                //check each candidate
                for(int c=0; c<candidates.size(); c++)
                {
                    match = false; //reset match to false
                    //tokenize the candidate so that we know what items need to be present for a match
                    st = new StringTokenizer(candidates.get(c));
                    //check each item in the itemset to see if it is present in the transaction
                    while(st.hasMoreTokens())
                    {
                        match = (trans[Integer.valueOf(st.nextToken())-1]);
                        if(!match) //if it is not present in the transaction stop checking
                            break;
                    }
                    if(match) //if at this point it is a match, increase the count
                        count[c]++;
                }

            }
            for(int i=0; i<candidates.size(); i++)
            {
                //  System.out.println("Candidate: " + candidates.get(c) + " with count: " + count + " % is: " + (count/(double)numItems));
                //if the count% is larger than the minSup%, add to the candidate to the frequent candidates
                if((count[i]/(double)numTransactions)>=minSup)
                {
                    frequentCandidates.add(candidates.get(i));
                    //put the frequent itemset into the output file
                    file_out.write(candidates.get(i) + "," + count[i]/(double)numTransactions + "\n");
                }
            }
            file_out.write("-\n");
            file_out.close();
    }
    //if error at all in this process, catch it and print the error messate
    catch(IOException e)
    {
        System.out.println(e);
    }
    //clear old candidates
    candidates.clear();
    //new candidates are the old frequent candidates
    candidates = new Vector<String>(frequentCandidates);
    frequentCandidates.clear();
}

}

0 个答案:

没有答案