Question

我是C语言的新手，我将如何检查一维char数组的重复项

例如

#define MAX_SIZE 60
Char canvas[MAX_SIZE] = {0};
for(int i=0; i<MAX_SIZE;i++){
   //How do i check if there is a duplicate in that array?
}

如何迭代检查重复项，例如我必须在循环中使用double并在此处执行sizeOf（canavas）/ SOMETHING吗？

Answer 1

我的解决方案，使用函数：

#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>

bool mem_hasduplicates(const char arr[], size_t len)
{
    assert(arr != NULL);
    if (len == 0) 
        return false;
    for (size_t i = 0; i < len - 1; ++i) {
        for (size_t j = i + 1; j < len; ++j) { 
            if (arr[i] == arr[j]) { 
                return true;
            }
        }
    }
    return false;
}

int main() {
    const char canvas[] = "zcxabca";
    printf("%x\n", mem_hasduplicates(canvas, sizeof(canvas)/sizeof(canvas[0])));


    const char other_canvas[] = "abcfsd";
    printf("%x\n", mem_hasduplicates(other_canvas, sizeof(other_canvas)/sizeof(other_canvas[0])));
}

可通过onlinegdb获得实时版本。

@edit或者我们可以按照@selbie的建议“仅”从所有数字创建直方图，尽管这使我变得很复杂：

#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>

struct histogram_value_s {
    char value;
    unsigned int count;
};

struct histogram_s {
    struct histogram_value_s *v;
    size_t len;
};


#define HISTOGRAM_INIT()  {0}

void histogram_fini(struct histogram_s *t)
{
    t->len = 0;
    free(t->v);
}

static int histogram_sort_by_value_qsort_cb(const void *a0, const void *b0)
{
    const struct histogram_value_s *a = a0;
    const struct histogram_value_s *b = b0;
    assert(a != NULL);
    assert(b != NULL);
    return a->value - b->value;
}

void histogram_sort_by_value(struct histogram_s *t)
{
    qsort(t->v, t->len, sizeof(*t->v), histogram_sort_by_value_qsort_cb);
}

static int histogram_sort_by_count_qsort_cb(const void *a0, const void *b0)
{
    const struct histogram_value_s *a = a0;
    const struct histogram_value_s *b = b0;
    assert(a != NULL);
    assert(b != NULL);
    return a->count - b->count;
}

void histogram_sort_by_count(struct histogram_s *t)
{
    qsort(t->v, t->len, sizeof(*t->v), histogram_sort_by_count_qsort_cb);
}


int histogram_getValue_2(const struct histogram_s *t, char value, size_t *idx, unsigned int *ret0)
{
    for (size_t i = 0; i < t->len; ++i) {
        if (t->v[i].value == value) {
            if (ret0) {
                *ret0 = t->v[i].count;
            }
            if (idx) {
                *idx = i;
            }
            return 0;
        }
    }
    return -1;
}

void histogram_printlns_generic(const struct histogram_s *t, const char fmt[])
{
    assert(t != NULL);
    for (size_t i = 0; i < t->len; ++i) {
        printf(fmt, t->v[i].value, t->v[i].count);
    }
}

int histogram_add(struct histogram_s *t, char value)
{
    size_t idx;
    if (histogram_getValue_2(t, value, &idx, NULL) == 0) {
        if (t->v[idx].count == UINT_MAX) {
            goto ERR;
        }
        ++t->v[idx].count;
    } else {
        void *tmp;

        tmp = realloc(t->v, (t->len + 1) * sizeof(*t->v));
        if (tmp == NULL) goto ERR;
        t->v = tmp;

        t->v[t->len] = (struct histogram_value_s){
            .value = value,
            .count = 1,
        };
        ++t->len;
    }

    return 0;
ERR:
    return -1;
}

bool histogram_has_any_count_greater_then_2(const struct histogram_s *t)
{
    assert(t != NULL);
    for (size_t i = 0; i < t->len; ++i) {
        if (t->v[i].count >= 2) {
            return true;
        }
    }
    return false;
}

/* ----------------------------------------------------------- */

int histogram_create_from_mem(struct histogram_s *ret0, const char arr[], size_t len)
{
    assert(ret0 != NULL);
    assert(arr != NULL);

    struct histogram_s ret = HISTOGRAM_INIT();

    for (size_t i = 0; i < len; ++i) {

        const char to_add = arr[i];

        if (histogram_add(&ret, to_add) < 0) {
            goto ERR;
        }
    }

    *ret0 = ret;
    return 0;
ERR:
    histogram_fini(&ret);
    return -1;
}

int main() {
    const char canvas[] = "abc";

    struct histogram_s h;
    int ret;
    ret = histogram_create_from_mem(&h, canvas, sizeof(canvas)/sizeof(canvas[0]));
    if (ret) {
        fprintf(stderr, "mem_createhistogram error!\n");
        return -1;
    }


    printf("'%s' %s duplicates\n",
        canvas,
        histogram_has_any_count_greater_then_2(&h)
            ? "has"
            : "does not have"
    );

    histogram_fini(&h);
}

实时版本here。

@edit或者我们可以对数组进行排序，并检查相邻的两个字节是否相同！

#include <stdlib.h>
#include <stdbool.h>

int cmp_chars(const void *a, const void *b)
{
    return *(char*)a - *(char*)b;
}

int main() {
    char canvas[] = "abca";

    qsort(canvas, sizeof(canvas) - 1, sizeof(canvas[0]), cmp_chars);

    bool duplicate_found = false;
    for (char *p = canvas; p[1] != '\0'; ++p) {
        if (p[0] == p[1]) {
            duplicate_found = true;
            break;
        }
    }

    printf("'%s' %s duplicates\n",
        canvas,
        duplicate_found ? "has" : "does not have");
}

可通过onlinegdb获得实时版本。

Answer 2

如果Char只是char的错字，那么这变得相对简单-设置第二个以字符代码为索引的数组，以跟踪每个字符的出现次数：< / p>

#include <limits.h>
#include <ctype.h>
...
int charCount[SCHAR_MAX+1] = {0}; // We're only going to worry about non-negative
                                  // character codes (i.e., standard ASCII)
                                  // [0..127]
...
/**
 * This assumes that canvas is *not* a 0-terminated string, and that
 * every element of the array is meaningful.  If that's not the case,
 * then loop on the length of the string instead of MAX_SIZE.
 */
for ( int i = 0; i < MAX_SIZE; i++ )
{
  if ( canvas[i] >= 0 && canvas[i] <= SCHAR_MAX )
  {
    charCount[canvas[i]]++;  // index into charCount by the value of canvas[i]
  }
}

然后，您可以遍历charCount数组并打印出现多次的所有字符值：

for ( int i = 0; i <= SCHAR_MAX; i++ )
{
  if ( charCount[i] > 1 )
  {
    /**
     * If the character value is a printable character (punctuation, alpha,
     * digit), print the character surrounded by single quotes - otherwise,
     * print the character code as a decimal integer.
     */
    printf( isprint( i ) ? "'%c': %d\n" : "%d: %d\n", i, charCount[i] );
  }
}

SCHAR_MAX的全部含义是什么，为什么我对评论中的非负字符代码感到厌烦？

在C语言中，保证基本执行字符集的字符（数字，大写和小写字母，通用标点字符）具有非负编码（例如，[0..127]范围为标准ASCII）。该基本执行字符集之外的字符可能具有正值或负值，具体取决于实现方式。因此，char值的范围在某些平台上可能是[-128..127]，在其他平台上可能是[0..255]。

limits.h标头定义了各种类型范围的常量-对于字符，它定义了以下常量：

UCHAR_MAX - maximum unsigned character value (255 on most platforms)
SCHAR_MIN - minimum signed character value (-128 on most platforms)
SCHAR_MAX - maximum signed character value (127 on most platforms)
CHAR_MIN  - minimum character value, either 0 or SCHAR_MIN depending on platform
CHAR_MAX  - maximum character value, either UCHAR_MAX or SCHAR_MAX depending on value

为使此代码简单，我只担心[0..127]范围内的字符代码；否则，我将不得不将负字符代码映射到非负数组索引上，而我并不想这样做。

此方法和嵌套循环解决方案都需要一些权衡。嵌套循环解决方案以时间换空间，而该解决方案以空间换时间。在这种情况下，无论canvas有多大，附加空间都是固定的。在嵌套循环的情况下，时间将随着canvas的长度的平方增加。对于短输入，实际上没有什么区别，但是如果canvas足够大，则嵌套循环解决方案的性能将大大降低。

如何检查字符数组c中的重复项

2 个答案: