Apriori算法 - 在Java中挖掘关联规则

时间:2016-05-28 14:19:43

标签: java algorithm data-mining apriori

扫描交易以找到L1 对于(k = 2; Lk-1!空; k ++){ 从Lk-1生成Ck 计算Ck中项目集的出现次数 找Lk

L1:所有频繁的1项集的集合,其计数不小于支持 Ck:候选人k-itemsets集 Lk:Ck的子集,其数量不低于支持

交易(dataset.txt)

A, B, E   
B, D   
B, C  
A, B, D   
A, C   
B, C   
A, C   
A, B, C, E  
A, B, C   
F

支持(分钟)= 20%

我们的关联规则数据挖掘任务有多个参数和阶段:

  1. 生成候选人,然后扫描并计算,然后检查最小支持 在这里它产生C到C3然而C2不计数而C3给出错误的值。
  2. C3应该是

    {A, B, C}  
    {A, B, D}  
    {A, B, E}  
    {A, C, D}  
    {A, C, E}  
    {A, D, E}  
    {B, C, D}  
    {B, C, E}  
    {B, D, E}  
    {C, D, E}    
    

    然而,我们正在

    A B C 
    C A D 
    A E E
    

    这是一个错误的结果。 所以第一个错误是C2,C3没有计数,C3也给出了错误的结果。

    我的代码如下:

    Main.java

    package apriori;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileReader;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.HashMap;
    import java.util.HashSet;
    import java.util.Map;
    import java.util.Scanner;
    import java.util.Set;
    import java.io.*;
    import java.util.*;
    
    @SuppressWarnings("unused")
    public class Main {
        public static int minSup = 2;
    
        public static void main(String args[]) throws IOException {
        FileWriter summary= new FileWriter("Summary.txt");
        String Freqitems = "Frequent.txt";
        String Infreqitems = "Infrequent.txt";
        long startTime = System.nanoTime();
        do{
            CandidateGen.CandGen();
            CandidateGen.CandGen();
            CandidateGen.CandGen();
    
        }while(SupportCounter.itemsize > 0);
    
    //  WRITING SUMMARY FILE        
            long endTime = System.nanoTime();
            LineNumberReader  Frelnr = new LineNumberReader(new FileReader(Freqitems));
            LineNumberReader  Infrelnr = new LineNumberReader(new FileReader(Infreqitems));
            Frelnr.skip(Long.MAX_VALUE);
            Infrelnr.skip(Long.MAX_VALUE);
            long totalTime = (endTime - startTime);
            summary.write("MinSup = "+minSup+System.getProperty("line.separator" )+
                            "Total T(C): "+CandidateGen.gettime()+" Nano seconds"+System.getProperty("line.separator" )+
                            "Total T(L): "+SupportCounter.gettime()+" Nano seconds"+System.getProperty("line.separator" )+
                            "Total Time of Execution = "+totalTime+" nano seconds"+System.getProperty("line.separator" )+
                            "Frequent itemsets: "+(Frelnr.getLineNumber() - 1)+System.getProperty("line.separator")+
                            "Infrequent itemsets: "+(Infrelnr.getLineNumber() - 1)+System.getProperty("line.separator"));
            summary.close();
            Frelnr.close();
            Infrelnr.close();
        }
    }
    

    SupportCounter.java

    package apriori;
    
    import java.io.File;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Collections;
    import java.util.HashMap;
    import java.util.Scanner;
    
    public class SupportCounter{
        static long startTime = System.nanoTime();
        static int callcount = 0;
        static int itemsize=0;
        public static void SupCoun()
        {
        //  HashMap map = new HashMap(); 
        //  String dataset = "dataset.txt";//determines name of file
            try {
                callcount = callcount +1;
    
                if(callcount ==1){
    
                    HashMap map = new HashMap(); 
                String dataset = "Dataset.txt";//determines name of file
            //  callcount = callcount +1;
                FileWriter Lk = new FileWriter("L"+callcount+".txt");
                Scanner list = new Scanner(new File(dataset));
    
    
                while (list.hasNext()) {
    
                    String word = list.next();
                    if (!list.hasNext()){
                        Lk.write("Time of Execution : "+ gettime()+" nano seconds"+System.getProperty("line.separator" ));
                    }
                    if(map.containsKey(word)) {
                        //itemsize=1;
                      Integer count = (Integer)map.get(word);
                      map.put(word, new Integer(count.intValue() + 1));
                    } else {
                       map.put(word, new Integer(1));
                    }
                  }
                ArrayList arraylist = new ArrayList(map.keySet());
                Collections.sort(arraylist);
                for (int i = 0; i < arraylist.size(); i++) {
                  String key = (String)arraylist.get(i);
                  Integer count = (Integer)map.get(key);
                if( count >= Main.minSup)
                        {
                        Lk.write(key + " : " + count + System.getProperty( "line.separator" ));
                        }
                    }      
                list.close();
                Lk.close();
                }//call count = 1 if end
    
                else if(callcount > 1){ // Write Lk
                    countfre(callcount);
    
                }//else-if end
            } // Try END
    
            catch (IOException e) 
            {
                e.printStackTrace();
            }
        }//SupCoun END
    
    private static void countfre(int filenumber) throws IOException{
    
            ArrayList<String> ckwords = new ArrayList<String>();
            ArrayList<String> dbwords = new ArrayList<String>();
            FileWriter Lk = new FileWriter("L"+filenumber+".txt");
            File ck = new File("C"+filenumber+".txt");
            Scanner ckscan = new Scanner(ck);//.useDelimiter(":");
    
             File dataset = new File("Dataset.txt");
             Scanner dbscan = new Scanner(dataset).useDelimiter("\n");
    
             int j1,i1 =0;
             if(ckscan.hasNext())
             {
             ckscan.nextLine();
             }
             Lk.write("Time of Execution : "+ gettime()+" nano seconds"+System.getProperty("line.separator" ));
             while(dbscan.hasNext())
             {
    
                 String wrd = dbscan.nextLine();
                 dbwords.add(wrd);
             }
             while(ckscan.hasNext())
             {
                 String wrd2 = ckscan.nextLine();
                 ckwords.add(wrd2);
             }
             int counter =0;
             ckscan = new Scanner(ck);//.useDelimiter(":");
             if(ckscan.hasNext())
             ckscan.nextLine();
             while(ckscan.hasNext())
             {
                 dbscan = new Scanner(dataset).useDelimiter("\n");
                 String wrd2 = ckscan.nextLine();
                 ckwords.add(wrd2);
    
                 for(j1=0;j1<dbwords.size();j1++){
                     if(dbwords.get(j1).contains(wrd2)){
                         counter++;
                     }
                 }
    
                 if(counter >= Main.minSup){
    
                 Lk.write(wrd2+" : "+counter+ System.getProperty( "line.separator" ));
                 }
                // System.out.println(wrd2+"--"+counter);
           // System.out.println("--------------------------------------------------------------");
             }
             Lk.close();         
          }
    
    
    
    
    //////////////END TIMER
    public static long gettime()
        {
            long endTime   = System.nanoTime();
            long totalTime = (long) ((endTime - startTime));
            return(totalTime);
        }
    }
    

    CandidateGen.java

    package apriori;
    
    import java.io.File;
    import java.io.FileWriter;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.Collections;
    import java.util.HashMap;
    import java.util.Scanner;
    
    public class CandidateGen {
        static long startTime = System.nanoTime();
        static int callcount = 0;
    
    public static void CandGen()
        {
            //HashMap map = new HashMap(); 
            try {
                String dataset = "dataset.txt";//determines name of file
                callcount = callcount +1;
    
                if(callcount == 1)
                {
                HashMap map = new HashMap();
                FileWriter Ck = new FileWriter("C"+callcount+".txt");
                FileWriter Infeq = new FileWriter("Infrequent.txt");
                FileWriter Feq = new FileWriter("Frequent.txt");
    
                Scanner list = new Scanner(new File(dataset));
                while (list.hasNext()) {
    
                    String word = list.next();
                    if (!list.hasNext()){
                        Feq.write("Time of execution : "+gettime()+" nano seconds"+System.getProperty("line.separator" ));
                        Infeq.write("Time of execution : "+gettime()+" nano seconds"+ System.getProperty("line.separator" ));
                        Ck.write("Time of execution : "+gettime()+" nano seconds"+ System.getProperty("line.separator" ));
                    }               
                    if(map.containsKey(word)) {
                      Integer count = (Integer)map.get(word);
                      map.put(word, new Integer(count.intValue() + 1));
                    } else {
                       map.put(word, new Integer(1));
                    }
                  }
                ArrayList arraylist = new ArrayList(map.keySet());
                Collections.sort(arraylist);
                for (int i = 0; i < arraylist.size(); i++) {
                  String key = (String)arraylist.get(i);
                  Integer count = (Integer)map.get(key);
    
                if( count < Main.minSup)
                {
                    Infeq.write(key  + System.getProperty( "line.separator" ));
                }
                else{
                    Feq.write(key + System.getProperty( "line.separator" ));
                    }
                Ck.write(key +" : "+count+System.getProperty( "line.separator" ));
                   }
                //System.out.println("Frequent and Infrequent items separated to frequent.txt , infrequent.txt ");
                list.close();
                Infeq.close();
                Feq.close();
                Ck.close();
                SupportCounter.SupCoun();
            } 
            /// 2-itemset
                else if(callcount == 2){ // Write Ck
                    String Lk = "L"+(callcount-1)+".txt";
                    FileWriter Ck = new FileWriter("C"+callcount+".txt");
                    HashMap map = new HashMap();
                    Scanner list = new Scanner(new File(Lk));
                    list.nextLine();
    
                    while (list.hasNext()) {
                    String word = list.next();
    
                    if(map.containsKey(word)) {
                          Integer count = (Integer)map.get(word);
    
                          map.put(word, new Integer(count.intValue() + 1));
                        } else {
                           map.put(word, new Integer(1));
                        }
                    list.nextLine();
                      }//while 
    
                    ArrayList arraylist = new ArrayList(map.keySet());
                    Collections.sort(arraylist);
    
                    Ck.write("Time of execution : "+gettime()+" nano seconds"+ System.getProperty("line.separator" ));
    
                    for (int i = 0; i < arraylist.size(); i++) {
    
                        for (int j = i+1; j < arraylist.size(); j++) {
    
                          String key = (String)arraylist.get(i);
                          String key2 = (String)arraylist.get(j);
                          Ck.write(key + " " + key2 + System.getProperty( "line.separator" ));
                       // System.out.println(key + "," + key2 );
                        }
                    }
                    Ck.close();
                    countfre(callcount);
                    SupportCounter.SupCoun();
                }//else-if end
    
        /// 3-itemset
                else if(callcount >2){
                String Lk = "L"+(callcount-1)+".txt";
                FileWriter Ck = new FileWriter("C"+callcount+".txt");
                Scanner list = new Scanner(new File(Lk));
                Scanner list2 = new Scanner(new File(Lk));
                list.nextLine();
                int c=0;
                ArrayList arraylist= new ArrayList();
                ArrayList arraylist2= new ArrayList();
                //HashMap map = new HashMap();
    
                while(list.hasNext())
                {
                    String word = list.next();
                    c++;
                    //System.out.println(word);
                    if(word.contains(":"))
                    {
                        list.nextLine();
                        c=0;
                        continue;
                    }
                    else if(c == callcount)
                    {
                        if(list.hasNext())
                        {
    
                        list.nextLine();
                        continue;
                        }
                        else
                            break;
                    }
                    //System.out.println(word);
                    arraylist.add(word);
                    }
    
                list2.nextLine();
                list2.nextLine();
                while(list2.hasNext())
                {
                    String word = list2.next();
                    c++;
                    //System.out.println(word);
                    if(word.contains(":"))
                    {
                        list2.nextLine();
                        c=0;
                        continue;
                    }
                    else if(c == callcount)
                    {
                        if(list2.hasNext())
                        {
    
                        list2.nextLine();
                        continue;
                        }
                        else
                            break;
                    }
                    //System.out.println(word);
                    arraylist2.add(word);
    
                }
                int el = 0;
                String set3,set4;
                ArrayList arraylist3= new ArrayList();
                //Scanner scanarray = new Scanner((Readable) arraylist2);
                for(int i=0;i<arraylist.size();i++)
                {
    
                    c++;
                    set3 = (String) arraylist.get(i);
                    for(int j=0;j<(arraylist2.size());j++)
                    {
    
                        set4 = (String)arraylist2.get(j);
                        if(set3.contains(set4))
                            {
                                //System.out.println(i+" "+j);
                                i++;
                                //System.out.println(i+" "+j);
                                //j++;
                                //System.out.println(i-1+" "+i+" "+(j+1)+"-");
                                String w = (String)arraylist.get(i-1);
                                String w2 = (String) arraylist.get(i);
                                //j++;
                                String w3 = (String) arraylist2.get(j+1);
                                System.out.println(w +" "+w2+" "+w3);
                                Ck.write(w+" "+w2+" "+w3+System.getProperty("line.separator"));
                                arraylist3.add(w);
                                arraylist3.add(w2);
                                arraylist3.add(w3);
                                el++;
    
                                //System.out.println(el);
                            }
                        i=i+1;
                        //j=j+2;
                        }
                    //  j=j=0;
                    }
                for(int i=0;i<el;i++)
                {
                //  System.out.println(arraylist3.get(i));
                }
                //}
                Ck.close();
    
                countfre(callcount);
    
            }//else-if end
    
        }
            catch (IOException e) 
            {
                e.printStackTrace();
            }
    
    
        }
    
    private static void countfre(int filenumber) throws IOException{
    
        ArrayList<String> ckwords = new ArrayList<String>();
        ArrayList<String> dbwords = new ArrayList<String>();
    
        File ck = new File("C2.txt");
         Scanner ckscan = new Scanner(ck);//.useDelimiter(":");
    
         File dataset = new File("dataset.txt");
         Scanner dbscan = new Scanner(dataset).useDelimiter("\n");
    
         int j1,i1 =0;
         ckscan.nextLine();
         while(dbscan.hasNext())
         {
             String wrd = dbscan.nextLine();
             dbwords.add(wrd);
         }
         while(ckscan.hasNext())
         {
             String wrd2 = ckscan.nextLine();
             ckwords.add(wrd2);
         }
         int counter =0;
         ckscan = new Scanner(ck);//.useDelimiter(":");
         ckscan.nextLine();
         while(ckscan.hasNext())
         {
             dbscan = new Scanner(dataset).useDelimiter("\n");
             String wrd2 = ckscan.nextLine();
             ckwords.add(wrd2);
             for(j1=0;j1<dbwords.size();j1++){
                 if(dbwords.get(j1).contains(wrd2)){
                     counter++;
                 }
    
         }
     //      System.out.println(wrd2+"--"+counter);
      //  System.out.println("--------------------------------------------------------------");
         }
      }
    
    
    
    
        // END of TIMER
        public static   double gettime()
        {
            long endTime   = System.nanoTime();
            double totalTime = (double) ((endTime - startTime));
            return (double) (totalTime);
        }
    }
    
    1. 从频繁项目集中挖掘规则 该代码不支持挖掘规则示例

      规则信心支持
      A =&gt; B 66%40%
      A =&gt; C 66%40%
      A =&gt; E 33%20%
      B =&gt; C 57%40%
      B =&gt; D 29%20%
      B =&gt; E 29%20%

    2. 任何批评都非常感谢。先谢谢你了

0 个答案:

没有答案