INPUT : { 4, 4, 1, 2, 3, 2, 3 }
OUTPUT: { 2, 3, 4 }
算法很简单,但目标是使其与std :: unique()一样高效。我的天真实现就地修改了容器:
void not_unique(vector<int>* pv)
if (!pv)
// Sort (in-place) so we can find duplicates in linear time
sort(pv->begin(), pv->end());
vector<int>::iterator it_start = pv->begin();
while (it_start != pv->end())
size_t nKeep = 0;
// Find the next different element
vector<int>::iterator it_stop = it_start + 1;
while (it_stop != pv->end() && *it_start == *it_stop)
nKeep = 1; // This gets set redundantly
// If the element is a duplicate, keep only the first one (nKeep=1).
// Otherwise, the element is not duplicated so erase it (nKeep=0).
it_start = pv->erase(it_start + nKeep, it_stop);
答案 0 :(得分:5)
具有线性复杂度)。 It
template <typename It>
It not_unique(It first, It last)
if (first == last) { return last; }
It new_last = first;
for (It current = first, next = ++first; next != last; ++current, ++next)
if (*current == *next)
if (current == new_last)
*new_last++ = *current;
while (next != last && *current == *next)
if (next == last)
return new_last;
return new_last;
答案 1 :(得分:3)
template<class TIter>
/** Moves duplicates to front, returning end of duplicates range.
* Use a sorted range as input. */
TIter Duplicates(TIter begin, TIter end) {
TIter dup = begin;
for (TIter it = begin; it != end; ++it) {
TIter next = it;
TIter const miss = std::mismatch(next, end, it).second;
if (miss != it) {
*dup++ = *miss;
it = miss;
return dup;
答案 2 :(得分:2)
答案 3 :(得分:2)
这是标准库的风格。归功于算法goes to James! (如果你给我+1,你最好给他+1, 或者 )。我所做的只是使它成为标准的图书馆风格:
#include <algorithm>
#include <functional>
#include <iostream>
#include <iterator>
#include <vector>
// other stuff (not for you)
template <typename T>
void print(const char* pMsg, const T& pContainer)
std::cout << pMsg << "\n ";
std::copy(pContainer.begin(), pContainer.end(),
std::ostream_iterator<typename T::value_type>(std::cout, " "));
std::cout << std::endl;
template <typename T, size_t N>
T* endof(T (&pArray)[N])
return &pArray[0] + N;
// not_unique functions (for you)
template <typename ForwardIterator, typename BinaryPredicate>
ForwardIterator not_unique(ForwardIterator pFirst, ForwardIterator pLast,
BinaryPredicate pPred)
// correctly handle case where an empty range was given:
if (pFirst == pLast)
return pLast;
ForwardIterator result = pFirst;
ForwardIterator previous = pFirst;
for (++pFirst; pFirst != pLast; ++pFirst, ++previous)
// if equal to previous
if (pPred(*pFirst, *previous))
if (previous == result)
// if we just bumped bump again
else if (!pPred(*previous, *result))
// if it needs to be copied, copy it
*result = *previous;
// bump
return result;
template <typename ForwardIterator>
ForwardIterator not_unique(ForwardIterator pFirst, ForwardIterator pLast)
return not_unique(pFirst, pLast,
std::equal_to<typename ForwardIterator::value_type>());
int main()
typedef std::vector<int> vec;
int data[] = {1, 4, 7, 7, 2, 2, 2, 3, 9, 9, 5, 4, 2, 8};
vec v(data, endof(data));
// precondition
std::sort(v.begin(), v.end());
print("before", v);
// duplicatify (it's a word now)
vec::iterator iter = not_unique(v.begin(), v.end());
print("after", v);
// remove extra
v.erase(iter, v.end());
print("erased", v);
答案 4 :(得分:2)
#include <algorithm>
#include <functional>
template< class I, class P >
I remove_unique( I first, I last, P pred = P() ) {
I dest = first;
while (
( first = std::adjacent_find( first, last, pred ) )
!= last ) {
* dest = * first;
++ first;
++ dest;
if ( ( first = std::adjacent_find( first, last, std::not2( pred ) ) )
== last ) break;
++ first;
return dest;
template< class I >
I remove_unique( I first, I last ) {
return remove_unique( first, last,
std::equal_to< typename std::iterator_traits<I>::value_type >() );
答案 5 :(得分:1)
我认为从大的角度来看,你已经实现了它的优点。最重要的成本是排序,即O(N log N)。但是,一种可能性是使用重复条目构建新向量,而不是使用现有向量和删除操作来删除非重复项。但是,如果重复的不同数量相对于条目总数较小,则这种情况会更好。
编辑我只是在考虑“面试”问题。换句话说,这不是一个非常有用的答案。但是有可能在O(N)(线性时间)而不是O(N Log N)中解决这个问题。使用存储空间而不是CPU。创建两个“位”数组,最初清除它们。循环遍历整数值向量。查找第一个数组中的每个值。如果未设置,则设置该位(将其设置为1)。如果已设置,则在第二个数组中设置相应的位(表示重复)。处理完所有向量条目后,扫描第二个数组并输出重复的整数(由第二个位数组中设置的位表示)。使用位阵列的原因仅仅是为了提高空间效率。如果处理4字节整数,则所需的原始空间为(2 * 2^32 / 8 )
clear/zero bitarrays
// NOTE - do not need to sort the input
foreach value in original vector {
if ( bitarray1[value] )
// duplicate
bitarray2[value] = 1
bitarray1[value] = 1
// At this point, bitarray2 contains a 1 for all duplicate values.
// Scan it and create the new vector with the answer
for i = 0 to maxvalue
if ( bitarray2[i] )
print/save/keep i
答案 6 :(得分:1)
调用“erase(it_start + keep,it_stop);”从while循环内部将导致一遍又一遍地复制其余元素。
int num_repeats(vector<int>::const_iterator curr, vector<int>::const_iterator end) {
int same = *curr;
int count = 0;
while (curr != end && same == *curr) {
return count;
void dups(vector<int> *v) {
sort(v->begin(), v->end());
vector<int>::iterator current = v->begin();
vector<int>::iterator end_of_dups = v->begin();
while (current != v->end()) {
int n = num_repeats(current, v->end());
if (n > 1) {
swap(*end_of_dups, *current);
current += n;
v->erase(end_of_dups, v->end());
答案 7 :(得分:1)
template <typename T>
void keep_duplicates(vector<T>& v)
u(v.begin(), v.end()), // unique
d; // duplicates
for (size_t i = 0; i < v.size(); i++)
if (u.find(v[i]) != u.end())
v = vector<T>(d.begin(), d.end());
答案 8 :(得分:0)
这修复了James McNellis's原始版本中的错误。我还提供就地和不合适的版本。
// In-place version. Uses less memory and works for more container
// types but is slower.
template <typename It>
It not_unique_inplace(It first, It last)
if (first == last)
return last;
It new_last = first;
for (It current = first, next = first + 1; next != last; ++current, ++next)
if (*current == *next &&
(new_last == first || *current != *(new_last-1)))
*new_last++ = *current;
return new_last;
// Out-of-place version. Fastest.
template <typename It, typename Container>
void not_unique(It first, It last, Container pout)
if (first == last || !pout)
for (It current = first, next = first + 1; next != last; ++current, ++next)
if (*current == *next &&
(pout->empty() || *current != pout->back()))
答案 9 :(得分:0)
“与std :: unique一样高效”是什么意思?在运行时,开发时间,内存使用或什么方面有效?
正如其他人所指出的那样,std :: unique需要你没有提供的排序输入,所以开始时这不是一个公平的测试。
就我个人而言,我只需要一个std :: map就可以完成我的所有工作。它具有许多我们可以用来最大限度优雅/简洁的属性。它保持元素已经排序,如果键不存在,operator []将插入一个零值。通过利用这些属性,我们可以在两行或三行代码中完成此操作,并且仍然可以实现合理的运行时复杂性。
#include <iostream>
#include <vector>
#include <map>
output_sorted_duplicates(std::vector<int>* v)
std::map<int, int> m;
// count how many of each element there are, putting results into map
// map keys are elements in the vector,
// map values are the frequency of that element
for (std::vector<int>::iterator vb = v->begin(); vb != v->end(); ++vb)
// output keys whose values are 2 or more
// the keys are already sorted by the map
for (std::map<int, int>::iterator mb = m.begin(); mb != m.end(); ++mb)
if ( (*mb).second >= 2 )
std::cout << (*mb).first << " ";
std::cout << std::endl;
int main(void)
int initializer[] = { 4, 4, 1, 2, 3, 2, 3 };
std::vector<int> data(&initializer[0], &initializer[0] + 7);
janks@phoenix:/tmp$ g++ test.cc && ./a.out
2 3 4