如何修复循环,因为它仅返回上一次迭代

时间:2019-03-27 11:09:05

标签: r

我有一个pdf列表和关键字列表,每个关键字都应贯穿每个pdf,如果pdf中存在,则返回TRUE

尝试了两个for循环,但是它仅返回last关键字,并且结果也不正确,所有都返回TRUE。只有一个pdf包含该单词,但所有pdf都显示TRUE


library(pdftools)
library(stringr)
library(tm)
library(filesstrings)
library(RODBC)


setwd("C:/RProject/ReadPDF/InputFiles/")

 SelectFirstKeyword <- list("new formula" , "new research", "morning up")



 ID  <- list.files("C:/RProject/ReadPDF/InputFiles/", full.names = T)

 ID_ <- ID[ID != ""]

 files   <- ID_


 for(i in 1:length(files)){

         for(j in 1:length(SelectFirstKeyword)){



           filename <- files[i]
           read <- readPDF(control = list(text = "-layout"))

           mystring <- Corpus(URISource(filename), readerControl = list(reader = read))

           lower_string <- tolower(mystring)
           CleanData  <- gsub("\n", " ",lower_string)

           second_string <- tolower(SelectFirstKeyword[j])
           print(second_string)

           mystring <- paste(CleanData,sep=" ")
           mystring_vector <- str_split(CleanData, "!")[[1]]

           FirstMatch <- second_string

           Match1 <- grepl(FirstMatch,mystring_vector[1])

           mystring <- paste(CleanData,sep=" ")
           mystring_vector <- str_split(CleanData, "!")[[1]]

           FirstMatch <- second_string

           Match1 <- grepl(FirstMatch,mystring_vector[1])


          FinalOutput <- paste(pdf_list,Outid, Match1)

          View(FinalOutput)


            if (Match1 == TRUE)
            {
                DATA1  <- paste(ID = pdf_list, Outid = FirstMatch , Keywordinnote = Match1)


            }

     }
}

View(DATA1)

### I shall email you the pdf files


预期输出为:

ID  outid       keywordinnote
1   news        TRUE
2   new formula TRUE

0 个答案:

没有答案