使用C从数组中删除重复项

时间:2012-03-08 06:56:29

标签: c algorithm

我想在C中的数组概念中进行小的澄清。 我有阵列:

int a[11]={1,2,3,4,5,11,11,11,11,16,16};

我想要这样的结果:

{1,2,3,4,5,11,16}

意味着我想删除重复项。 这怎么可能?

9 个答案:

答案 0 :(得分:9)

你不能轻易地在C中调整数组 - 至少不是数组,因为你已经声明了一个。显然,如果数据按排序顺序,则可以直接将数据复制到分配的数组的前面,并将其视为正确的较小尺寸(并且它是线性O(n)算法) 。如果数据没有排序,它会变得更加混乱;平凡的算法是二次的,所以也许排序(O(N lg N))后面的线性算法是最好的。

您可以使用动态分配的内存来管理阵列。但是,这可能超出了你在学习中所达到的水平。

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>

static int intcmp(const void *pa, const void *pb)
{
    int a = *(int *)pa;
    int b = *(int *)pb;
    if (a > b)
        return +1;
    else if (a < b)
        return -1;
    else
        return 0;
}

static int compact(int *array, int size)
{
    int i;
    int last = 0;
    assert(size >= 0);
    if (size <= 0)
        return size;
    for (i = 1; i < size; i++)
    {
        if (array[i] != array[last])
            array[++last] = array[i];
    }
    return(last + 1);
}

static void print(int *array, int size, const char *tag, const char *name)
{
   int i;
   printf("%s\n", tag);
   for (i = 0; i < size; i++)
       printf("%s[%d] = %d\n", name, i, array[i]);
}

int main(void)
{
   int a[11] = {1,2,3,4,5,11,11,11,11,16,16};
   int a_size = sizeof(a) / sizeof(a[0]);

   print(a, a_size, "Before", "a");
   a_size = compact(a, a_size);
   print(a, a_size, "After", "a");

   int b[11] = {11,1,11,3,16,2,5,11,4,11,16};
   int b_size = sizeof(b) / sizeof(b[0]);

   print(b, b_size, "Before", "b");
   qsort(b, b_size, sizeof(b[0]), intcmp);
   print(b, b_size, "Sorted", "b");
   b_size = compact(b, b_size);
   print(b, b_size, "After", "b");

   return 0;
}

答案 1 :(得分:1)

#define arraysize(x)  (sizeof(x) / sizeof(x[0])) // put this before main

int main() {
    bool duplicate = false; 
    int a[11] = {1,2,3,4,5,11,11,11,11,16,16}; // doesnt have to be sorted
    int b[11];
    int index = 0;

    for(int i = 0; i < arraysize(a); i++) { // looping through the main array
        for(int j = 0; j < index; j++) { // looping through the target array where we know we have data. if we haven't found anything yet, this wont loop
            if(a[i] == b[j]) { // if the target array contains the object, no need to continue further. 
                duplicate = true;
                break; // break from this loop
            }
        }
        if(!duplicate) { // if our value wasn't found in 'b' we will add this non-dublicate at index
           b[index] = a[i];
           index++;
        }
        duplicate = false; // restart
    }
    // optional 
    int c[index]; // index will be the number of objects we have in b

    for(int k = 0; k < index; k++) {
        c[k] = b[k];
    }
}

如果你真的需要,可以创建一个大小正确的新数组并将其复制到其中。

正如你所看到的,C是一种非常基本的(但功能强大的)语言,如果可以的话,可以使用向量而不是你的对象(也许是c ++的std :: vector),这可以根据你的需要轻松增加。

但只要您只使用少量整数,就不应该松懈。如果你有大量的数据,你总是可以用“malloc()”在堆上分配数组,并选择一个较小的尺寸(可能是原始源数组的一半)然后你可以增加(使用realloc())当你向它添加更多对象时。还有一些缺点,一直在重新分配内存,但这是你必须做出的决定 - 快速但分配更多的数据然后你需要?或者更慢,并且具有您需要分配的确切数量的元素(您实际上无法控制,因为malloc()可能会分配更多数据,然后在某些情况下需要)。

答案 2 :(得分:1)

//gcc -Wall q2.cc -o q2 && q2                                                                             

//Write a program to remove duplicates from a sorted array.                                               

/*                                                                                                        
  The basic idea of our algorithm is to compare 2 adjacent values and determine if they                   
are the same.  If they are not the same and we weren't already looking previusly at adjacent pairs        
that were the same, then we output the value at the current index.  The algorithm does everything         
in-place and doesn't allocate any new memory.  It outputs the unique values into the input array.         
 */                                                                                                       

#include <stdio.h>                                                                                        
#include <assert.h>                                                                                       

int remove_dups(int *arr, int n)                                                                          
{                                                                                                         
        int idx = 0, odx = -1;                                                                            
        bool dup = false;                                                                                 
        while (idx < n)                                                                                   
        {                                                                                                 
                if (arr[idx] != arr[idx+1])                                                               
                {                                                                                         
                        if (dup)                                                                          
                                dup = false;                                                              
                        else                                                                              
                        {                                                                                 
                                arr[++odx] = arr[idx];                                                    
                        }                                                                                 
                } else                                                                                    
                        dup = true;                                                                       

                idx++;                                                                                    
        }                                                                                                 

        return (odx == -1) ? -1 : ++odx;                                                                  
}                                                                                                         

int main(int argc, char *argv[])                                                                          
{                                                                                                         
        int a[] = {31,44,44,67,67,99,99,100,101};                                                         
        int k = remove_dups(a,9);                                                                         
        assert(k == 3);                                                                                   
        for (int i = 0;i<k;i++)                                                                           
                printf("%d ",a[i]);                                                                       

        printf("\n\n");                                                                                   
        int b[] = {-5,-3,-2,-2,-2,-2,1,3,5,5,18,18};                                                      
        k = remove_dups(b,12);                                                                            
        assert(k == 4);                                                                                   
        for (int i = 0;i<k;i++)                                                                           
                printf("%d ",b[i]);                                                                       

        printf("\n\n");                                                                                   
        int c[] = {1,2,3,4,5,6,7,8,9};                                                                    
        k = remove_dups(c,9);                                                                             
        assert(k == 9);                                                                                   
        for (int i = 0;i<k;i++)                                                                           
                printf("%d ",c[i]);                                                                       

        return 0;                                                                                         
}                                                                                                         

答案 3 :(得分:0)

你应该创建一个新数组,如果在插入新元素之前包含要插入的元素,你应该检查数组。

答案 4 :(得分:0)

问题不明确。但是,如果您尝试删除重复项,则可以使用嵌套的“for”循环并删除所有出现多次的值。

答案 5 :(得分:0)

C没有支持您想要的内置数据类型 - 您需要创建自己的数据类型。

答案 6 :(得分:0)

int a [11] = {1,2,3,4,5,11,11,11,11,16,16};

由于此数组是排序数组,您可以通过以下代码轻松实现。

int LengthofArray = 11;

//First elemnt can not be a duplicate so exclude the same and start from i = 1 than 0.

for(int i = 1; i < LengthofArray; i++);
{
   if(a[i] == a[i-1])
      RemoveArrayElementatIndex(i);
}

//function is used to remove the elements in the same as index passed to remove.

RemoveArrayElementatIndex(int i)
{
   int k  = 0;

   if(i <=0)
   return;


   k = i;
   int j =1; // variable is used to next item(offset) in the array from k.

   //Move the next items to the array    
   //if its last item then the length of the array is updated directly, eg. incase i = 10.

   while((k+j) < LengthofArray)
   { 
     if(a[k] == a[k+j])
     {
         //increment only j , as another duplicate in this array
         j = j +1 ;
     }
     else
     {
         a[k] = a[k+j];
         //increment only k , as offset remains same
         k = k + 1;   
     }

   }   

   //set the new length of the array . 
   LengthofArray = k; 

}

答案 7 :(得分:0)

您可以使用qsort中的stdlib.h来确保您的数组按升序排序,以消除对嵌套循环的需求。

请注意,qsort需要一个指向函数的指针(在本例中为int_cmp),我已将其包含在下面。

此函数int_array_unique返回重复的自由数组'就地',即它覆盖原始数据并通过pn指针返回重复的自由数组的长度

/**
 * Return unique version of int array (duplicates removed)
 */
int int_array_unique(int *array, size_t *pn)
{
    size_t n = *pn;

    /* return err code 1 if a zero length array is passed in */
    if (n == 0) return 1;

    int i;
    /* count the no. of unique array values */
    int c=0;

    /* sort input array so any duplicate values will be positioned next to each
     * other */
    qsort(array, n, sizeof(int), int_cmp);

    /* size of the unique array is unknown at this point, but the output array
     * can be no larger than the input array. Note, the correct length of the
     * data is returned via pn */
    int *tmp_array = calloc(n, sizeof(int));

    tmp_array[c] = array[0];
    c++;

    for (i=1; i<n; i++) {
        /* true if consecutive values are not equal */
        if ( array[i] != array[i-1]) {
            tmp_array[c] = array[i];
            c++;
        }
    }

    memmove(array, tmp_array, n*sizeof(int));

    free(tmp_array);

    /* set return parameter to length of data (e.g. no. of valid integers not
     * actual allocated array length) of the uniqe array */
    *pn = c;

    return 0;
}

/* qsort int comparison function */
int int_cmp(const void *a, const void *b)
{
    const int *ia = (const int *)a; // casting pointer types
    const int *ib = (const int *)b;

    /* integer comparison: returns negative if b > a
    and positive if a > b */
    return *ia  - *ib;
}

答案 8 :(得分:0)

将具有较小条件的数组元素存储到新数组中 **只需运行一次100%就可以了 !)将第一个值存储到数组

II)在存储值之前存储另一个元素检查..

III)如果存在则留下元素 - 并检查下一个并存储

这里下面的代码运行这个你会更好理解

int main()
{

    int a[10],b[10],i,n,j=0,pos=0;
    printf("\n enter a n value ");
    scanf("%d",&n);
    printf("\n enter a array value");
    for(i=0;i<n;i++)
    {
        scanf("%d",&a[i]);//gets the arry value
    }

   for(i=0;i<n;i++)

   {
    if(check(a[i],pos,b)==0)//checks array each value its exits or not
    {
        b[j]=a[i];
        j++;
        pos++;//count the size of new storing element
    }
   }
    printf("\n after updating array");

    for(j=0;j<pos;j++)
    {
        printf("\n %d",b[j]);
    }   return 0;

    }
   int check(int x,int pos,int b[])
{    int m=0,i;
    for(i=0;i<pos;i++)//checking the already only stored element
    {
        if(b[i]==x)
        {
           m++; //already exists increment the m value
        }
    }
    return m;
}