如何在文件中的当前单词之前和之后获取单词?

时间:2013-08-15 20:07:13

标签: c file replace words

我有一个文件“data.txt”,其中包含以下内容:http://pastebin.com/FY9ZTQX6

我试图在“<”之前和之后得到这个词。旧词是左边的那个,新词是右边的那个。这就是我到目前为止所做的:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

/*
Name: Marcus Lorenzana
Assignment: Final
*/

//binary tree struct to hold left and right node
//as well as the word and number of occurrences
typedef struct node
{
    char *word;
    int count;
    struct node *left;
    struct node *right;
}
node;

//,.?!:;-
int punctuation[7];


void insert(node ** dictionary, node * entry);
char* readFile(char* filename);
void printDictionary(node * tree);
void toLower(char** word);
void getReplacementWords(char *filecontents, char **newWord, char **oldWord)  ;


int main()
{

    char *word;
    char* filecontents = readFile("data.txt");
    char* oldWord;
    char* newWord;

    //create dictionary node
    node *dictionary;
    node *entry;

    //read words and punctuation in from the text file
    word = strtok (filecontents, " \n");

    dictionary = NULL;

    while (word != NULL)
    {
        //word = strlwr(word); 
        entry = (node *) malloc(sizeof(node));
        entry->left = entry->right = NULL;
        entry->word = malloc(sizeof(char)*(strlen(word)+1));
        entry->word = word;
        insert(&dictionary,entry);
        word = strtok (NULL, " \n");
    }

    //printDictionary(dictionary);

    filecontents = readFile("data.txt"); 
    getReplacementWords(filecontents,&newWord,&oldWord);




    return 0;
}

void insert(node ** dictionary, node * entry)
{
    if(!(*dictionary))
    {
        *dictionary = entry;
        entry->count=1;
        return;
    }


    int result = strcmp(entry->word,(*dictionary)->word);

    if(result<0){
        insert(&(*dictionary)->left, entry);
        entry->count++; 
    }
    else if(result>0){
        insert(&(*dictionary)->right, entry);
        entry->count++; 
    } else {
        entry->count++; 
    }

}

//put file contents in string for strtok
char* readFile(char* filename)
{
    FILE* file = fopen(filename,"r");
    if(file == NULL)
    {
        return NULL;
    }

    fseek(file, 0, SEEK_END);
    long int size = ftell(file);
    rewind(file);

    char* content = calloc(size + 1, 1);

    fread(content,1,size,file);

    return content;
}

void printDictionary(node * dictionary)
{
    if(dictionary->left) {
        printDictionary(dictionary->left);
    }

    printf("%s\n",dictionary->word);

    if(dictionary->right) {
        printDictionary(dictionary->right);
    }
}


void getReplacementWords(char *filecontents, char **newWord, char **oldWord) {
    char *word;
    word = strtok (filecontents, " \n");

    while (word != NULL)
    {
        printf("\n%s",word); 
        int result = strcmp(word,"<"); 
        if (result == 0) {
            printf("\nFound replacement identifier"); 
        }
        word = strtok (NULL, " \n");
    }
}

4 个答案:

答案 0 :(得分:2)

你可以用 fscanf(filename , "%s < %s" , firstStringContainer , secondStringContainer)

使用fseek到达包含<字符的行后,这将获得&lt; 1之前的字符串存储在firstStringContaine r和secondStringContainer之后的存储 这是您要使用的代码:

int found = 0;
char buffer[chooseYourSize];
char firstStringContainer[chooseYourSize] , secondStringContainer[chooseYourSize];
while(fgets(buffer , sizeof(buffer) , filename) != NULL)
{
    if(strchr(buffer , '<'))
    {
         found++;
         break;
    }
}
if(found)
{
     fscanf(file , "%s < %s" , firstStringContainer , secondStringContainer);
}

当然,这仅在目标线仅包含三个元素字符串&lt;字符串就是这里的情况

答案 1 :(得分:1)

如果您的数据格式为STRING1&lt;你可以做STRING2:

fscanf(file,"%s < %s", string1, string2);

如果它在某个地方的某个地方,那将会更加困难。您可以做的是从文件中抓取行并将它们放入缓冲区,然后找到>,返回到第一个字符串的开头,然后阅读您想要的内容。

while(fgets(buff,sizeof(buff),file) != NULL 
{
    if( (pointer = strstr(buff," > ")) != NULL) 
     {
        //now you have located the > just go back 
        //in the buff till you reach the start of 
        //string1 and then use
        sscanf(buff+(pointer * sizeof(char)),"%s > %s",string1, string2)
     }
}

我这样做已经有一段时间了,所以可能会出现语法错误

答案 2 :(得分:0)

您可以在循环中使用fseek()向前/向后跳过1个元素并验证它是space还是>还是other needed character(来自的其他功能) string.h中)。

当您找到此符号时,您可以将指针向前/向后移动到另一个spaceother needed character,记住跳过的字符数 N 然后复制 N 符号到字符串变量。

substitute < replacement
           ^ find this symbol

substitute < replacement
          ^ make a loop that  makes `counter++` when it finds `space`
            (int counter  = 0;)

 substitute < replacement
^ the loop will continue and will find the 2nd `space`, and make `counter++`
  when `counter == 2` (1 space after and 1 before the word) the loop stops. 
  Now `file` pointer points to the `space` symbol before the 1st word.
  Then skip 1 element forward (using `fseek()`) and now you have
  `file` pointer that points to the 1st word.
  And now you can do whatever you want!

执行相同的操作以找到第二个单词(file指针将指向第二个单词,这样您就可以再次调用此函数:它将在您的第二个>中查找文本)并创建一个函数findWordsNearArrow()或类似的东西。

你可以在一个循环中调用这个函数,所以当它找到EOF时,它会返回你可以用来退出循环的特定值。

再想一想。 (c)中

答案 3 :(得分:0)

使用fgets()strchr()到达<的行。

while (strchr (fgets (buffer, sizeof (buffer), file), '<') == NULL)
     ;       // do nothing

然后使用strtok()来解析缓冲区中的当前行

strcpy (oldword, strtok (buffer, "<"));
strcpy (newword, strtok (NULL, "\n"));