我正在寻找创建功能的帮助。
函数deleteIdents()将删除char数组中的相同行,因为它们是连续的。它将保留一条完全相同的行。
我不需要检查整条线是否相同。只有前79个字符MAXCHARS才适合这种情况。
因此,例如,如果我的数组包含
from itertools import cycle
abc = ['a', 'b', 'c', 'd', 'e' ]
alfs = ''
for n, e in enumerate(cycle(abc)): # lazy enumeration?
alfs += e
if n >= 18: # must have stopping test to break infinite loop
break
alfs
Out[30]: 'abcdeabcdeabcdeabcd'
它将被更改为
Hello World
Hi World
Hello World
Hello World
Hello World
Hi there
在我的脑海中,该功能看起来类似于:
Hello World
Hi World
Hello World
Hi there
但我不确定。如果您有解决方案,我会很高兴并感谢您听到它:)
答案 0 :(得分:1)
读取第一行而不是第二行,如果它们相等则比较它们进入循环直到它们不相等。所以这是代码:
char *first_line = malloc(MAXLINE);
char *second_line = malloc(MAXLINE);
getline(first_line);
do {
getline(second_line);
} while (strcmp (first_line, second_line));
对于getline()
实施搜索,有很多例子。或here你有我的。
答案 1 :(得分:1)
另一个例子如何实现。想法是保持2个指针,并且只有在条目不同时才增加第一个指针。还分配了一些额外的存储空间以避免已被覆盖的条目的内存泄漏。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int unique(char **strings, int size) {
if (!strings) {
return -1;
}
int head = 0, newHead = 0, duplicatedElementsHead = 0;
//Save duplicates to avoid memory leaks
char** duplicatedEntries = malloc(size*sizeof(char*));
while (head < size) {
//String are the same
if (!strcmp(strings[head], strings[newHead])) {
if (head != newHead) {
duplicatedEntries[duplicatedElementsHead++] = strings[newHead];
}
++head;
} else {
strings[++newHead] = strings[head++];
}
}
//Put duplicated entries after new end
int idx = 0, tmpHead = newHead + 1;
for (; idx < duplicatedElementsHead; ++idx) {
strings[tmpHead++] = duplicatedEntries[idx];
}
free(duplicatedEntries);
return newHead;
}
int main() {
char **strings = malloc(8*sizeof(char*));
strings[0] = "Hello World";
strings[1] = "Hi World";
strings[2] = "Hi World";
strings[3] = "Hello World";
strings[4] = "Hello World";
strings[5] = "Hi there";
strings[6] = "Hia";
strings[7] = "Hi";
int newEnd = unique(strings, 8);
for (int i=0; i < newEnd; ++i) {
printf("%s\n", strings[i]);
}
free(strings);
}
答案 2 :(得分:0)
您实际上是在编写unix / linux实用程序的核心功能,'uniq'。
cat filename | sort | uniq > newfile
#or skip sort, since you didn't mention
cat filename | uniq > newfile
你可以使用popen和uniq(类似这样......)
FILE *uniqfh;
uniqfh = popen("cat file1 | uniq" , "r");
if (uniqfh == NULL) { //handle error
}
while( fgets(uniqfh, buffer, buffersize) ) printf("%s\n",buffer);
但严重的是,你可以编写uniq()的核心,
static long MAXUNIQ=79; //or whatever you want
char*
isdup(char* prev, char* next, long len)
{
//if( !prev || !next) error
long n = len<=0 ? MAXUNIQ : len;
for( ; *prev==*next && n --> 0; ) { //down-to operator (sic)
; //clearly nothing happening here!
}
return( (n<1) || !(*p+*n) );
}
/yeah, this is actually strncmp, but hey
你需要一个'strings'数组(char *或char []),让我们读一下,
char* ray[ARRAYMAX]; //define how many elements of your arRay
//could use, char** ray; and malloc(ARRAYMAX*sizeof(char*))
long
read_array(FILE* fh, char* ray[])
{
char buffer[MAXLINE+1];
long count=0;
while( fgets(buffer,sizeof(buffer),fh) ) {
//you could eat dups here, or in separate function below
//if( (count<1) && !isdup(ray[count-1],buffer,MAXUNIQ) )
ray[count++] = strdup(buffer);
}
//ray[0] through ray[count-1] contain char*
//count contains number of strings read
return count;
}
long
deleteIdents(long raysize, char* ray[]) //de-duplicate
{
long kept, ndx;
for( ndx=1, kept=0; ndx<raysize; ++ndx ) {
if( !isdup(ray[kept],ray[ndx]) ) {
ray[kept++] = ray[ndx];
}
else {
free(ray[ndx]);
ray[ndx] = NULL; //not entirely necessary,
}
}
return kept; //new ray size
}
你需要这个来称呼它......
...
long raysize;
char* ray[ARRAYMAX] = {0}; //init to null pointers
raysize = read_array(fopen(filename,"r"),ray);
raysize = deleteIndents(raysize,ray);
...
稍后,你需要释放malloc'ed字符串,
for( ; 0 <-- raysize; ) { free(ray[raysize]); ray[raysize] = NULL; }
答案 3 :(得分:0)
以下程序在字符串元素数组上执行您所需的操作。我们用两个指针导航数组,初始化为第一个和第二个元素。我们运行一个array_n - 1
比较一个元素到下一个元素的循环,比较两个字符串......如果它们不同,我们将*source_ptr
字符串指针复制到*target_ptr
个地方。如果它们不同,我们只增加source_ptr
,所以它指向下一个数组字符串,但是而不复制它(这使我们有效地删除了指针)我们也在管理新的array(我们使用相同的数组作为源和目标,因为我们只能删除数组元素,因此每次我们在两个指针之间都有一个更大的漏洞)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* array of strings */
char *array[] = {
"Hello World",
"Hi World",
"Hello World",
"Hello World",
"Hello World",
"Hi there",
};
size_t array_n = sizeof array / sizeof *array;
int main()
{
int i;
char **target_ptr = array, **source_ptr = array + 1;
size_t new_length = 1;
for (i = 1; i < array_n; i++) {
/* if strings pointed to by pointers are equal */
if (strcmp(*target_ptr, *source_ptr) == 0) {
/* go to the next, effectively discarding the second pointer */
source_ptr++;
} else {
/* copy both pointers in place, to the destination array */
*target_ptr++ = *source_ptr++;
new_length++; /* increment array length */
}
}
/* finally, we have in the array only the good pointers */
/* print'em */
for (i = 0; i < new_length; i++)
printf("%s\n", array[i]);
exit(0);
}
那就是全部。
$ pru
Hi World
Hello World
Hi there
Hello World
$ _