阅读ICD文本文件时应用规则的冲突

时间:2014-04-09 14:45:53

标签: java text

行阅读规则:

  1. 没有缩进/破折== 重大疾病
  2. 一个缩进/短划线( - )== subIndent1
  3. 两个缩进/短划线( - )== subIndent2
  4. 三个缩进/破折号(---)== subIndent3
  5. 四个缩进/短划线(----)== subIndent4
  6. 五个缩进/破折号(-----)== subIndent5
  7. 六个缩进/短划线(-----)== subIndent6
  8. Disease.txt (原始短样本)
    http://www.cdc.gov/nchs/data/dvs/2e_volume3_2014.pdf(完整疾病ICD文件样本)

    Aars Q87.1
    Abdomen, abdominal — see also condition
    - acute R10.0
    -- convulsive 
    equivalent G40.8
    Abdominalgia R10.4
    Abduction contracture, hip or other joint —see Contraction, joint
    Aberrant (congenital) — see also Malposition, congenital
    - adrenal gland Q89.1
    - artery (peripheral) NEC Q27.8
    - breast Q83.8
    Aberration, mental F99.0
    - endocrine gland NEC Q89.2
    - hepatic duct Q44.5
    - pancreas Q45.3
    -- vein (peripheral) 
    pender NEC Q27.8
    

    我试过以下代码。以上所有规则在逐行阅读疾病文本文件时都能正常工作。

    主要疾病之豆:DiseaseCategory.java

    public class DiseaseCategory {
            private int mdId;
            private String mdName;
            private String mdCode;
    
            DiseaseCategory(int mdId, String mdName, String mdCode){
                this.mdId=mdId;
                this.mdName=mdName;
                this.mdCode=mdCode;
    //setter and getters below
            }
    

    子缩进疾病的Bean:SubDiseaseCategory .java

    public class SubDiseaseCategory {
    SubDiseaseCategory(int sdId,    int mdId,   String sdIndent1,String sdCode1,
        String sdIndent2, String sdCode2, String sdIndent3,String sdCode3,String sdIndent4,String sdCode4,
        String sdIndent5,String sdCode5,String sdIndent6, String sdCode6)
    {
            this.sdId=sdId;
            this.mdId=mdId;
            this.sdIndent1=sdIndent1;
            this.sdCode1=sdCode1;
            this.sdIndent2=sdIndent2;
            this.sdCode2=sdCode2;
            this.sdIndent3=sdIndent3;
            this.sdCode3=sdCode3;
            this.sdIndent4=sdIndent4;
            this.sdCode4=sdCode4;
            this.sdIndent5=sdIndent5;
            this.sdCode5=sdCode5;
            this.sdIndent6=sdIndent6;
            this.sdCode6=sdCode6;
        }
    
        private int sdId;   
        private int mdId;   
        private String sdIndent1;   
        private String sdCode1  ;
        private String sdIndent2;   
        private String sdCode2  ;   
        private String sdIndent3;   
        private String sdCode3  ;
        private String sdIndent4;   
        private String sdCode4  ;
        private String sdIndent5;   
        private String sdCode5  ;   
        private String sdIndent6;   
        private String sdCode6  ;
    //all setters and getters
    }
    

    在DataMigeration.java中执行的主要功能

    public class DataMigeration
    {
    
    public static void main(String args[]) throws IOException
    {   
            String FILE_NAME="c://com/best/uibeans/diseases.txt";
            File file=new File(FILE_NAME);
    
            DataMigeration ob=new DataMigeration();
            //ob.connection();
            ob.readAndSaveFile(file);
    }
    private Connection conn=null;
    private Statement stmt=null;
    
    public void connection() {
        String userName = "guest";
        String password = "";
        String url="jdbc:sqlserver://JAVASERVER2\\MTS;databaseName=ICD";
        try {   
        Class.forName("com.microsoft.sqlserver.jdbc.SQLServerDriver");
            System.out.println("Driver Loaded.........");
        } catch (ClassNotFoundException e) {
            System.out.println("Driver not Loaded..........");
        e.printStackTrace();
    }
    try {
        conn = DriverManager.getConnection(url, userName, password);    
     System.out.println("Connection  Established..........");
        stmt=conn.createStatement();    
    } catch (SQLException e) {
        e.printStackTrace();
    System.out.println("Connection not Established..........");
    }   
    }
    public void SaveMajorDisease(int mdId, String mdName, String mdCode){
        try {
            String query="";
            mdName= mdName.replace("'", "");
            mdName=mdName.trim();
    
            query="insert into MAJOR_DISEASE(md_id, md_name, md_code) " +
                    "values ("+mdId+",'"+mdName+"','"+mdCode+"')";
            c(query);       
            stmt.executeUpdate(query);
            System.out.println("Data Inserted Succesfully....");
        } catch (SQLException e) {
            System.out.println("Data Not Inserted Succesfully....");
            e.printStackTrace();
        }
    }
    
    public void SaveSubDisease(
            int sdId,           int mdId,
            String sdIndent1,   String sdCode1,
            String sdIndent2,   String sdCode2,
            String sdIndent3,   String sdCode3,
            String sdIndent4,   String sdCode4,
            String sdIndent5,   String sdCode5,
            String sdIndent6,   String sdCode6  
            ){
        try {
            String query="";
            sdIndent1= sdIndent1.replace("'", "");
            sdIndent1=sdIndent1.trim();
    
            sdIndent2= sdIndent2.replace("'", "");
            sdIndent2=sdIndent2.trim();
    
            sdIndent3= sdIndent3.replace("'", "");
            sdIndent3=sdIndent3.trim();
    
            sdIndent4= sdIndent4.replace("'", "");
            sdIndent4=sdIndent4.trim();
    
            sdIndent5= sdIndent5.replace("'", "");
            sdIndent5=sdIndent5.trim();
    
            sdIndent6= sdIndent6.replace("'", "");
            sdIndent6=sdIndent6.trim();
    
    
             query="insert into SUB_DISEASE(sd_id, md_id, " +
                    " sd_indent1, sd_code1 , " +
                    " sd_indent2, sd_code2 , " +
                    " sd_indent3, sd_code3 , " +
                    " sd_indent4, sd_code4 , " +
                    " sd_indent5, sd_code5 , " +
                    " sd_indent6, sd_code6  " +
                    " ) " +
                    "values ("+sdId+","+mdId+", " +
                    " '"+sdIndent1+"' , '"+sdCode1+"' , " +
                    "'"+sdIndent2+"' , '"+sdCode2+"' ," +
                    "'"+sdIndent3+"' , '"+sdCode3+"' ," +
                    "'"+sdIndent4+"' , '"+sdCode4+"' ," +
                    "'"+sdIndent5+"' , '"+sdCode5+"' ," +
                    "'"+sdIndent6+"' , '"+sdCode6+"' " +
                    " ) ";
            c(query);
            stmt.executeUpdate(query);
            System.out.println("Data Inserted Succesfully....");
        } catch (SQLException e) {
            System.out.println("Data Not Inserted Succesfully....");
            e.printStackTrace();
        }
    }
    
    public void readAndSaveFile(File file)
    {
        List<DiseaseCategory> majorDiseaseList=new ArrayList<DiseaseCategory>();
        List<SubDiseaseCategory> subDiseaseList=new ArrayList<SubDiseaseCategory>();
        try
         {
             String data="";
             String line;
             BufferedReader br = new BufferedReader(new FileReader(file));
            String majorDiseaseCode="";
            String majorDisease="";
    
             String sdIndent1="";   
             String sdCode1=""  ;
    
             String sdIndent2="";   
             String sdCode2=""  ;
    
             String sdIndent3="";   
             String sdCode3=""  ;
    
             String sdIndent4="";   
             String sdCode4=""  ;
    
             String sdIndent5="";   
             String sdCode5=""  ;
    
             String sdIndent6="";   
             String sdCode6=""  ;
    // _________________________________________________________//
    /* major disease record:    majorDiseaseList                */      
    int mdId=0;
    int sdId=0;                 
    
             while ((line = br.readLine()) != null)
             {
    
                majorDisease="";
                majorDiseaseCode="";
       if(!line.equals("")){
                if(lineContainsNoIndentAtFirst(line)==0)
                    {   
                        String tokens[]=line.split(" ");    
                    for(int i=0;i<tokens.length;i++)
                    {
                        if(tokens[i].contains("."))     if(tokens[i]!="0")  majorDiseaseCode=tokens[i];
                        if(!tokens[i].contains("."))        if(tokens[i]!="0")  majorDisease+=" "+tokens[i];                    
    
                    }   
                    if(!majorDisease.equals(null) && !majorDisease.equals("") && majorDisease!="" && majorDisease!=null){
    
                            mdId++;                                             
                            majorDiseaseList.add(new DiseaseCategory(mdId, majorDisease, majorDiseaseCode));
                    }                           
                }//end if
                else if(lineContainsNoIndentAtFirst(line)!=0)// for no dash or no any suffix at beginning
                {        
                    sdId++;             
                    if(lineContainsOneIndentAtFirst(line)==0) // for one '-' only
                    {
                        String tokens1[]=line.split(" ");
                        sdIndent1="";
                            for(int i=0;i<tokens1.length;i++)
                            {
                                if(tokens1[i].contains("."))        if(tokens1[i]!="0") sdCode1=tokens1[i];
                                if(!tokens1[i].contains("."))       if(tokens1[i]!="0") sdIndent1+=" "+tokens1[i];
                                sdIndent1= sdIndent1.replace("-", "");
                            }
                                subDiseaseList.add(new SubDiseaseCategory(
                                                                            sdId,
                                                                            mdId,
                                                                            sdIndent1,
                                                                            sdCode1,"","","","","","","","","",""
                                                                        ));
                    }//end if: lineContainsOneIndentAtFirst()
                    else if(lineContainsTwoIndentAtFirst(line)==0) // for two '--' only
                    {
                        String tokens2[]=line.split(" ");
                        sdIndent2="";
                            for(int i=0;i<tokens2.length;i++)
                            {
                                if(tokens2[i].contains("."))        if(tokens2[i]!="0") sdCode2=tokens2[i];
                                if(!tokens2[i].contains("."))       if(tokens2[i]!="0") sdIndent2+=" "+tokens2[i];
                                sdIndent2= sdIndent2.replace("-", "");
                            }
                                subDiseaseList.add(new SubDiseaseCategory(
                                                                            sdId,
                                                                            mdId,"","",
                                                                            sdIndent2,
                                                                            sdCode2,"","","","","", "", "", ""                              
                                                                        ));                                                         
                    }//end if: lineContainsTwoIndentAtFirst()           
                    else if(lineContainsThreeIndentAtFirst(line)==0) // for three '---' only
                    {
                        String tokens3[]=line.split(" ");
                        sdIndent3="";
                            for(int i=0;i<tokens3.length;i++)
                            {
                                if(tokens3[i].contains("."))        if(tokens3[i]!="0") sdCode3=tokens3[i];
                                if(!tokens3[i].contains("."))       if(tokens3[i]!="0") sdIndent3+=" "+tokens3[i];
                                sdIndent3= sdIndent3.replace("-", "");
                            }
                                subDiseaseList.add(new SubDiseaseCategory(
                                                                            sdId,
                                                                            mdId,"","","","",
                                                                            sdIndent3,
                                                                            sdCode3,"","","",   "", "", ""                              
                                                                        ));                                                         
                    }//end if: lineContainsThreeIndentAtFirst()         
                    else if(lineContainsFourIndentAtFirst(line)==0) // for Four '----' only
                    {
                        String tokens4[]=line.split(" ");
                        sdIndent4="";
                            for(int i=0;i<tokens4.length;i++)
                            {
                                if(tokens4[i].contains("."))        if(tokens4[i]!="0") sdCode4=tokens4[i];
                                if(!tokens4[i].contains("."))       if(tokens4[i]!="0") sdIndent4+=" "+tokens4[i];
                                sdIndent4= sdIndent4.replace("-", "");
                            }
                                subDiseaseList.add(new SubDiseaseCategory(
                                                                            sdId,
                                                                            mdId,"","","","","","",
                                                                            sdIndent4,
                                                                            sdCode4,"", "", "", ""                              
                                                                        ));                                                         
                    }//end if: lineContainsFourIndentAtFirst()          
                    else if(lineContainsFiveIndentAtFirst(line)==0) // for Four '----' only
                    {
                        String tokens5[]=line.split(" ");
                        sdIndent5="";
                            for(int i=0;i<tokens5.length;i++)
                            {
                                if(tokens5[i].contains("."))        if(tokens5[i]!="0") sdCode5=tokens5[i];
                                if(!tokens5[i].contains("."))       if(tokens5[i]!="0") sdIndent5+=" "+tokens5[i];
                                sdIndent5= sdIndent5.replace("-", "");
                            }
    
                                subDiseaseList.add(new SubDiseaseCategory(
                                                                            sdId,
                                                                            mdId,"","","","","","","","",
                                                                            sdIndent5,
                                                                            sdCode5,    "", ""                              
                                                                        ));                                                         
                    }//end if: lineContainsFiveIndentAtFirst()          
                    else if(lineContainsSixIndentAtFirst(line)==0) // for Four '----' only
                    {
                        String tokens6[]=line.split(" ");
                            for(int i=0;i<tokens6.length;i++)
                            {
                                if(tokens6[i].contains("."))        if(tokens6[i]!="0") sdCode6=tokens6[i];
                                if(!tokens6[i].contains("."))       if(tokens6[i]!="0") sdIndent6+=" "+tokens6[i];
                                sdIndent6= sdIndent6.replace("-", "");
                            }                       
                                subDiseaseList.add(new SubDiseaseCategory(
                                                                            sdId,
                                                                            mdId,"","","","","","","","","","",
                                                                            sdIndent6,
                                                                            sdCode6                             
                                                                        ));                         
                    }//end if: 5indent          
             }//end if: suffix: '-'
            }//if for not null index
          }// end while loop for line by line reading from text
    //Retrieving.....
    int maxRecords=1;         
    int masterMax=1;
          for(DiseaseCategory obj: majorDiseaseList){
                  masterMax++;                
    //            System.out.print("\n"+obj.getMdId()+" : "+obj.getMdName()+" : "+obj.getMdCode());
                  //saving Major disease
                  String mdName=obj.getMdName();
    //              String tokensEscape[]=mdName.split("\'");
    //                  for(int i=0;i<tokensEscape.length;i++)  mdName+="\'"+tokensEscape[i];
    //                  
    //                    System.out.print("\n"+obj.getMdId()+" : "+mdName+" : "+obj.getMdCode());
    //                  
                  SaveMajorDisease(obj.getMdId(),mdName,obj.getMdCode());             
          }
    
    
          System.out.println("\n____size:"+subDiseaseList.size()+"_________________________________________________________________________________________________________________________________\n");
    
          for(SubDiseaseCategory obj: subDiseaseList){
                  maxRecords++;       
                  String msg="";
                  msg+= obj.getSdId()+"     : "+obj.getMdId();
                  if(!obj.getSdIndent1().equals(""))    msg+= ":    i:      "+obj.getSdIndent1()+" :             \t\t\tcode1:   "+obj.getSdCode1();
                  if(!obj.getSdIndent2().equals(""))    msg+= ":    ii:     "+obj.getSdIndent2()+" :             \t\t\tcode2:   "+obj.getSdCode2();
                  if(!obj.getSdIndent3().equals(""))    msg+= ":    iii:    "+obj.getSdIndent3()+" :             \t\t\tcode3:   "+obj.getSdCode3();
                  if(!obj.getSdIndent4().equals(""))    msg+= ":    iv:     "+obj.getSdIndent4()+" :             \t\t\tcode4:   "+obj.getSdCode4();
                  if(!obj.getSdIndent5().equals(""))    msg+= ":    v:  "+obj.getSdIndent5()+" :                 \t\t\tcode5:   "+obj.getSdCode5();
                  if(!obj.getSdIndent6().equals(""))    msg+= ":    vi:     "+obj.getSdIndent6()+" :             \t\t\tcode6:   "+obj.getSdCode6();
            //    System.out.println(msg);  
    
    
    //saving sub_disease              
                  SaveSubDisease(
                                    obj.getSdId(),          obj.getMdId(),
                                    obj.getSdIndent1(),     obj.getSdCode1(),
                                    obj.getSdIndent2(),     obj.getSdCode2(),
                                    obj.getSdIndent3(),     obj.getSdCode3(),
                                    obj.getSdIndent4(),     obj.getSdCode4(),
                                    obj.getSdIndent5(),     obj.getSdCode5(),
                                    obj.getSdIndent6(),     obj.getSdCode6()
                                );
          }
    
          br.close();
    }
    catch(IOException ioe){}
    }
    
    public  int lineContainsNoIndentAtFirst(String str)
    {       
          int contains=0;         
    
    //  if(!str.startsWith("-"))
    //  {
              String dashCheck = str.substring(0,1)+""; 
              if( dashCheck.contains("-"))   contains++;
              if(!dashCheck.contains("-"))   contains=0;
    
    //          String tokensDotCheck[]=str.split(" ");
    //              for(int i=0;i<tokensDotCheck.length;i++)
    //              {
    //                  if(i==0){                       
    //                      if(tokensDotCheck[i].contains(".")){
    //                          contains++;
    //                      }                       
    //                  }
    //              }
    //        
    //  }else{
    //      contains++;
    //  }
    
              return contains;      
    }
    public  int lineContainsOneIndentAtFirst(String str)
    {
              String dashCheck = str.substring(0,3)+""; 
              int contains=0;         
              if(dashCheck.contains("- -"))  contains++;                  
              return contains;      
    }
    public  int lineContainsTwoIndentAtFirst(String str){       
              String dashCheck = str.substring(0,6)+""; 
              int contains=0;
              if(dashCheck.contains("- - - ")) contains++;
              return contains;      
    }
    public  int lineContainsThreeIndentAtFirst(String str){     
            String dashCheck = str.substring(0,8)+"";   
            int contains=0;
            if(dashCheck.contains("- - - - ")) contains++;    
          return contains;      
    }
    public  int lineContainsFourIndentAtFirst(String str){      
          String dashCheck = str.substring(0,10)+"";    
          int contains=0;
         if(dashCheck.contains("- - - - - "))   contains++;
          return contains;      
    }
    public  int lineContainsFiveIndentAtFirst(String str){      
          String dashCheck = str.substring(0,12)+"";    
          int contains=0;
         if(dashCheck.contains("- - - - - - "))  contains++;
          return contains;      
    }
    public  int lineContainsSixIndentAtFirst(String str){       
          String dashCheck = str.substring(0,14)+"";    
          int contains=0;
         if(dashCheck.contains("- - - - - - "))  contains++;
          return contains;      
    }
    public void c(String msg){  System.out.print("\n|   "+msg);}
    
    }
    

    以上代码在规则上工作正常,但问题如下,
    问题:

    Abdomen, abdominal — see also condition
    - acute R10.0
    -- convulsive 
    equivalent G40.8
    

    包裹的子病缩进依据规则1,没有缩进/短划线等于主要疾病但是它是包含在上面的行的文本。它不应该计入主要疾病但它应该计入先前的亚病缩行 如何解决这场冲突?
    可能我已经分享了所有的东西,如果任何人有任何困难,你可以通过评论提出答案。我会告诉你。

0 个答案:

没有答案