在C中,如何删除char数组中相同且连续的行?

时间:2018-04-26 02:47:54

标签: c arrays char

我正在寻找创建功能的帮助。

函数deleteIdents()将删除char数组中的相同行,因为它们是连续的。它将保留一条完全相同的行。

我不需要检查整条线是否相同。只有前79个字符MAXCHARS才适合这种情况。

因此,例如,如果我的数组包含

from itertools import cycle


abc = ['a', 'b', 'c', 'd', 'e' ]

alfs = ''

for n, e in enumerate(cycle(abc)):  # lazy enumeration?
    alfs += e
    if n >= 18:  # must have stopping test to break infinite loop
        break
alfs
Out[30]: 'abcdeabcdeabcdeabcd'

它将被更改为

Hello World
Hi World
Hello World
Hello World
Hello World
Hi there

在我的脑海中,该功能看起来类似于:

Hello World
Hi World
Hello World
Hi there

但我不确定。如果您有解决方案,我会很高兴并感谢您听到它:)

4 个答案:

答案 0 :(得分:1)

读取第一行而不是第二行,如果它们相等则比较它们进入循环直到它们不相等。所以这是代码:

char *first_line = malloc(MAXLINE);
char *second_line = malloc(MAXLINE);

getline(first_line);

do {
   getline(second_line);
} while (strcmp (first_line, second_line));

对于getline()实施搜索,有很多例子。或here你有我的。

答案 1 :(得分:1)

另一个例子如何实现。想法是保持2个指针,并且只有在条目不同时才增加第一个指针。还分配了一些额外的存储空间以避免已被覆盖的条目的内存泄漏。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int unique(char **strings, int size) {
    if (!strings) {
        return -1;
    }
    int head = 0, newHead = 0, duplicatedElementsHead = 0;
    //Save duplicates to avoid memory leaks
    char** duplicatedEntries = malloc(size*sizeof(char*));

    while (head < size) {
        //String are the same
        if (!strcmp(strings[head], strings[newHead])) {
            if (head != newHead) {
                duplicatedEntries[duplicatedElementsHead++] = strings[newHead];
            }
            ++head;
        } else {
            strings[++newHead] = strings[head++];
        }
    }

    //Put duplicated entries after new end
    int idx = 0, tmpHead = newHead + 1;
    for (; idx < duplicatedElementsHead; ++idx) {
        strings[tmpHead++] = duplicatedEntries[idx];
    }

    free(duplicatedEntries);
    return newHead;
}

int main() {
    char **strings = malloc(8*sizeof(char*));
    strings[0] = "Hello World";
    strings[1] = "Hi World";
    strings[2] = "Hi World";
    strings[3] = "Hello World";
    strings[4] = "Hello World";
    strings[5] = "Hi there";
    strings[6] = "Hia";
    strings[7] = "Hi";
    int newEnd = unique(strings, 8);
    for (int i=0; i < newEnd; ++i) {
        printf("%s\n", strings[i]);
    }
    free(strings);
}

答案 2 :(得分:0)

您实际上是在编写unix / linux实用程序的核心功能,'uniq'。

cat filename | sort | uniq > newfile
#or skip sort, since you didn't mention
cat filename | uniq > newfile

你可以使用popen和uniq(类似这样......)

FILE *uniqfh;
uniqfh = popen("cat file1 | uniq" , "r");
if (uniqfh == NULL) { //handle error
}
while( fgets(uniqfh, buffer, buffersize) ) printf("%s\n",buffer);

但严重的是,你可以编写uniq()的核心,

static long MAXUNIQ=79; //or whatever you want
char*
isdup(char* prev, char* next, long len)
{
    //if( !prev || !next) error
    long n = len<=0 ? MAXUNIQ : len;
    for(  ; *prev==*next && n --> 0; ) { //down-to operator (sic)
        ; //clearly nothing happening here!
    }
    return( (n<1) || !(*p+*n) );
}
/yeah, this is actually strncmp, but hey

你需要一个'strings'数组(char *或char []),让我们读一下,

char* ray[ARRAYMAX]; //define how many elements of your arRay
//could use, char** ray; and malloc(ARRAYMAX*sizeof(char*))

long
read_array(FILE* fh, char* ray[])
{
    char buffer[MAXLINE+1];
    long count=0;
    while( fgets(buffer,sizeof(buffer),fh) ) {
        //you could eat dups here, or in separate function below
        //if( (count<1) && !isdup(ray[count-1],buffer,MAXUNIQ) )
        ray[count++] = strdup(buffer);
    }
    //ray[0] through ray[count-1] contain char*
    //count contains number of strings read
    return count;
}
long
deleteIdents(long raysize, char* ray[]) //de-duplicate
{
    long kept, ndx;
    for( ndx=1, kept=0; ndx<raysize; ++ndx ) {
        if( !isdup(ray[kept],ray[ndx]) ) {
            ray[kept++] = ray[ndx];
        }
        else {
            free(ray[ndx]);
            ray[ndx] = NULL; //not entirely necessary, 
        }
    }
    return kept; //new ray size
}

你需要这个来称呼它......

...
long raysize;
char* ray[ARRAYMAX] = {0}; //init to null pointers
raysize = read_array(fopen(filename,"r"),ray);
raysize = deleteIndents(raysize,ray);
...

稍后,你需要释放malloc'ed字符串,

for( ; 0 <-- raysize; ) { free(ray[raysize]);  ray[raysize] = NULL; }

答案 3 :(得分:0)

以下程序在字符串元素数组上执行您所需的操作。我们用两个指针导航数组,初始化为第一个和第二个元素。我们运行一个array_n - 1比较一个元素到下一个元素的循环,比较两个字符串......如果它们不同,我们将*source_ptr字符串指针复制到*target_ptr个地方。如果它们不同,我们只增加source_ptr,所以它指向下一个数组字符串,但是而不复制它(这使我们有效地删除了指针)我们也在管理新的array(我们使用相同的数组作为源和目标,因为我们只能删除数组元素,因此每次我们在两个指针之间都有一个更大的漏洞)

pru.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* array of strings */    
char *array[] = {
    "Hello World",
    "Hi World",
    "Hello World",
    "Hello World",
    "Hello World",
    "Hi there",
};

size_t array_n = sizeof array / sizeof *array;

int main()
{
    int i;

    char **target_ptr = array, **source_ptr = array + 1;
    size_t new_length = 1;

    for (i = 1; i < array_n; i++) {
        /* if strings pointed to by pointers are equal */
        if (strcmp(*target_ptr, *source_ptr) == 0) {
            /* go to the next, effectively discarding the second pointer */
            source_ptr++;
        } else {
            /* copy both pointers in place, to the destination array */
            *target_ptr++ = *source_ptr++;
            new_length++; /* increment array length */
        }
    }
    /* finally, we have in the array only the good pointers */

    /* print'em */
    for (i = 0; i < new_length; i++)
        printf("%s\n", array[i]);

    exit(0);
}

那就是全部。

样本运行:

$ pru
Hi World
Hello World
Hi there
Hello World
$ _