我是C语言的新手,我将如何检查一维char数组的重复项
例如
#define MAX_SIZE 60
Char canvas[MAX_SIZE] = {0};
for(int i=0; i<MAX_SIZE;i++){
//How do i check if there is a duplicate in that array?
}
如何迭代检查重复项,例如我必须在循环中使用double并在此处执行sizeOf(canavas)/ SOMETHING吗?
答案 0 :(得分:3)
我的解决方案,使用函数:
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
bool mem_hasduplicates(const char arr[], size_t len)
{
assert(arr != NULL);
if (len == 0)
return false;
for (size_t i = 0; i < len - 1; ++i) {
for (size_t j = i + 1; j < len; ++j) {
if (arr[i] == arr[j]) {
return true;
}
}
}
return false;
}
int main() {
const char canvas[] = "zcxabca";
printf("%x\n", mem_hasduplicates(canvas, sizeof(canvas)/sizeof(canvas[0])));
const char other_canvas[] = "abcfsd";
printf("%x\n", mem_hasduplicates(other_canvas, sizeof(other_canvas)/sizeof(other_canvas[0])));
}
可通过onlinegdb获得实时版本。
@edit或者我们可以按照@selbie的建议“仅”从所有数字创建直方图,尽管这使我变得很复杂:
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
struct histogram_value_s {
char value;
unsigned int count;
};
struct histogram_s {
struct histogram_value_s *v;
size_t len;
};
#define HISTOGRAM_INIT() {0}
void histogram_fini(struct histogram_s *t)
{
t->len = 0;
free(t->v);
}
static int histogram_sort_by_value_qsort_cb(const void *a0, const void *b0)
{
const struct histogram_value_s *a = a0;
const struct histogram_value_s *b = b0;
assert(a != NULL);
assert(b != NULL);
return a->value - b->value;
}
void histogram_sort_by_value(struct histogram_s *t)
{
qsort(t->v, t->len, sizeof(*t->v), histogram_sort_by_value_qsort_cb);
}
static int histogram_sort_by_count_qsort_cb(const void *a0, const void *b0)
{
const struct histogram_value_s *a = a0;
const struct histogram_value_s *b = b0;
assert(a != NULL);
assert(b != NULL);
return a->count - b->count;
}
void histogram_sort_by_count(struct histogram_s *t)
{
qsort(t->v, t->len, sizeof(*t->v), histogram_sort_by_count_qsort_cb);
}
int histogram_getValue_2(const struct histogram_s *t, char value, size_t *idx, unsigned int *ret0)
{
for (size_t i = 0; i < t->len; ++i) {
if (t->v[i].value == value) {
if (ret0) {
*ret0 = t->v[i].count;
}
if (idx) {
*idx = i;
}
return 0;
}
}
return -1;
}
void histogram_printlns_generic(const struct histogram_s *t, const char fmt[])
{
assert(t != NULL);
for (size_t i = 0; i < t->len; ++i) {
printf(fmt, t->v[i].value, t->v[i].count);
}
}
int histogram_add(struct histogram_s *t, char value)
{
size_t idx;
if (histogram_getValue_2(t, value, &idx, NULL) == 0) {
if (t->v[idx].count == UINT_MAX) {
goto ERR;
}
++t->v[idx].count;
} else {
void *tmp;
tmp = realloc(t->v, (t->len + 1) * sizeof(*t->v));
if (tmp == NULL) goto ERR;
t->v = tmp;
t->v[t->len] = (struct histogram_value_s){
.value = value,
.count = 1,
};
++t->len;
}
return 0;
ERR:
return -1;
}
bool histogram_has_any_count_greater_then_2(const struct histogram_s *t)
{
assert(t != NULL);
for (size_t i = 0; i < t->len; ++i) {
if (t->v[i].count >= 2) {
return true;
}
}
return false;
}
/* ----------------------------------------------------------- */
int histogram_create_from_mem(struct histogram_s *ret0, const char arr[], size_t len)
{
assert(ret0 != NULL);
assert(arr != NULL);
struct histogram_s ret = HISTOGRAM_INIT();
for (size_t i = 0; i < len; ++i) {
const char to_add = arr[i];
if (histogram_add(&ret, to_add) < 0) {
goto ERR;
}
}
*ret0 = ret;
return 0;
ERR:
histogram_fini(&ret);
return -1;
}
int main() {
const char canvas[] = "abc";
struct histogram_s h;
int ret;
ret = histogram_create_from_mem(&h, canvas, sizeof(canvas)/sizeof(canvas[0]));
if (ret) {
fprintf(stderr, "mem_createhistogram error!\n");
return -1;
}
printf("'%s' %s duplicates\n",
canvas,
histogram_has_any_count_greater_then_2(&h)
? "has"
: "does not have"
);
histogram_fini(&h);
}
实时版本here。
@edit或者我们可以对数组进行排序,并检查相邻的两个字节是否相同!
#include <stdlib.h>
#include <stdbool.h>
int cmp_chars(const void *a, const void *b)
{
return *(char*)a - *(char*)b;
}
int main() {
char canvas[] = "abca";
qsort(canvas, sizeof(canvas) - 1, sizeof(canvas[0]), cmp_chars);
bool duplicate_found = false;
for (char *p = canvas; p[1] != '\0'; ++p) {
if (p[0] == p[1]) {
duplicate_found = true;
break;
}
}
printf("'%s' %s duplicates\n",
canvas,
duplicate_found ? "has" : "does not have");
}
可通过onlinegdb获得实时版本。
答案 1 :(得分:0)
如果Char
只是char
的错字,那么这变得相对简单-设置第二个以字符代码为索引的数组,以跟踪每个字符的出现次数:< / p>
#include <limits.h>
#include <ctype.h>
...
int charCount[SCHAR_MAX+1] = {0}; // We're only going to worry about non-negative
// character codes (i.e., standard ASCII)
// [0..127]
...
/**
* This assumes that canvas is *not* a 0-terminated string, and that
* every element of the array is meaningful. If that's not the case,
* then loop on the length of the string instead of MAX_SIZE.
*/
for ( int i = 0; i < MAX_SIZE; i++ )
{
if ( canvas[i] >= 0 && canvas[i] <= SCHAR_MAX )
{
charCount[canvas[i]]++; // index into charCount by the value of canvas[i]
}
}
然后,您可以遍历charCount
数组并打印出现多次的所有字符值:
for ( int i = 0; i <= SCHAR_MAX; i++ )
{
if ( charCount[i] > 1 )
{
/**
* If the character value is a printable character (punctuation, alpha,
* digit), print the character surrounded by single quotes - otherwise,
* print the character code as a decimal integer.
*/
printf( isprint( i ) ? "'%c': %d\n" : "%d: %d\n", i, charCount[i] );
}
}
SCHAR_MAX
的全部含义是什么,为什么我对评论中的非负字符代码感到厌烦?
在C语言中,保证基本执行字符集的字符(数字,大写和小写字母,通用标点字符)具有非负编码(例如,[0..127]
范围为标准ASCII)。该基本执行字符集之外的字符可能具有正值或负值,具体取决于实现方式。因此,char
值的范围在某些平台上可能是[-128..127]
,在其他平台上可能是[0..255]
。
limits.h
标头定义了各种类型范围的常量-对于字符,它定义了以下常量:
UCHAR_MAX - maximum unsigned character value (255 on most platforms)
SCHAR_MIN - minimum signed character value (-128 on most platforms)
SCHAR_MAX - maximum signed character value (127 on most platforms)
CHAR_MIN - minimum character value, either 0 or SCHAR_MIN depending on platform
CHAR_MAX - maximum character value, either UCHAR_MAX or SCHAR_MAX depending on value
为使此代码简单,我只担心[0..127]
范围内的字符代码;否则,我将不得不将负字符代码映射到非负数组索引上,而我并不想这样做。
此方法和嵌套循环解决方案都需要一些权衡。嵌套循环解决方案以时间换空间,而该解决方案以空间换时间。在这种情况下,无论canvas
有多大,附加空间都是固定的。在嵌套循环的情况下,时间将随着canvas
的长度的平方增加。对于短输入,实际上没有什么区别,但是如果canvas
足够大,则嵌套循环解决方案的性能将大大降低。