Question

我想在将elements属性设置为零时复制矢量。我有一个std::vector<PLY>向量，它包含特定数量的以下结构元素：

struct PLY{
    float x;
    float y;
    float z;
}

创建此向量副本的最快方法是什么，其中每个PLY元素的z-value为0？是否有更快的方法然后创建向量的副本，然后迭代每个元素以设置新的z值？

Answer 1

您可以使用std::transform：

std::vector<PLY> zeroed{};
zeroed.reserve(other_vec.size()); //pre-allocate the storage
std::transform(other_vec.begin(), other_vec.end(), std::back_inserter(zeroed), 
           [](auto e){ e.z = 0.f; return e; });

Answer 2

最快的方式是什么？？

第一个回答

测试一下。内存架构做了令人惊讶的事情。

#include <iostream>
#include <chrono>
#include <vector>
#include <iomanip>
#include <algorithm>

struct PLY
{
    PLY() : x(0), y(0), z(0) {}
    PLY(float x, float y, float z) : x(x), y(y), z(z) {}
    float x, y , z;
};



template<class F>
std::vector<PLY> test(const char* name, std::vector<PLY> samples, F f)
{
    using namespace std::literals;
    std::vector<PLY> result;
    result.reserve(samples.size());

    auto start = std::chrono::high_resolution_clock::now();

    f(result, samples);

    auto end = std::chrono::high_resolution_clock::now();

    using fns = std::chrono::duration<long double, std::chrono::nanoseconds::period>;
    using fms = std::chrono::duration<long double, std::chrono::milliseconds::period>;
    using fs = std::chrono::duration<long double, std::chrono::seconds::period>;

    auto interval = fns(end - start);
    auto time_per_sample = interval / samples.size();
    auto samples_per_second = 1s / time_per_sample;

    std::cout << "testing " << name << '\n';
    std::cout << " sample size        : " << samples.size() << '\n';
    std::cout << " time taken         : " << std::fixed << fms(interval).count() << "ms\n";
    std::cout << " time per sample    : " << std::fixed << (interval / samples.size()).count() << "ns\n";
    std::cout << " samples per second : " << std::fixed << samples_per_second << "\n";

    return result;
}

struct zero_z_iterator : std::vector<PLY>::const_iterator
{
    using base_class = std::vector<PLY>::const_iterator;
    using value_type = PLY;

    using base_class::base_class;

    value_type operator*() const {
        auto const& src = base_class::operator*();
        return PLY{ src.x, src.y, 0.0 };
    }
};

int main()
{

    test("transform", std::vector<PLY>(1000000), [](auto& target, auto& source)
         {
             std::transform(source.begin(), source.end(),
                            std::back_inserter(target),
                            [](auto& ply) {
                                return PLY { ply.x, ply.y, ply.z };
                            });
         });

    test("copy and reset z", std::vector<PLY>(1000000), [](auto& target, auto& source)
         {
             std::copy(source.begin(), source.end(),
                       std::back_inserter(target));
             for (auto& x : target)
             {
                 x.z = 0;
             }
         });

    test("hand_roll", std::vector<PLY>(1000000), [](auto& target, auto& source)
         {
             for(auto& x : source) {
                 target.emplace_back(x.x, x.y, 0.0);
             }
         });

    test("assign through custom iterator", std::vector<PLY>(1000000), [](auto& target, auto& source)
         {
             target.assign(zero_z_iterator(source.begin()),
                                           zero_z_iterator(source.end()));
         });


    test("transform", std::vector<PLY>(100000000), [](auto& target, auto& source)
         {
             std::transform(source.begin(), source.end(),
                            std::back_inserter(target),
                            [](auto& ply) {
                                return PLY { ply.x, ply.y, ply.z };
                            });
         });

    test("copy and reset z", std::vector<PLY>(100000000), [](auto& target, auto& source)
         {
             std::copy(source.begin(), source.end(),
                       std::back_inserter(target));
             for (auto& x : target)
             {
                 x.z = 0;
             }
         });

    test("hand_roll", std::vector<PLY>(100000000), [](auto& target, auto& source)
         {
             for(auto& x : source) {
                 target.emplace_back(x.x, x.y, 0.0);
             }
         });

    test("assign through custom iterator", std::vector<PLY>(100000000), [](auto& target, auto& source)
         {
             target.assign(zero_z_iterator(source.begin()),
                           zero_z_iterator(source.end()));
         });
}

样本结果

testing transform
 sample size        : 1000000
 time taken         : 7.495685ms
 time per sample    : 7.495685ns
 samples per second : 133410088.604310
testing copy and reset z
 sample size        : 1000000
 time taken         : 3.436614ms
 time per sample    : 3.436614ns
 samples per second : 290984090.735823
testing hand_roll
 sample size        : 1000000
 time taken         : 3.289287ms
 time per sample    : 3.289287ns
 samples per second : 304017253.587176
testing assign through custom iterator
 sample size        : 1000000
 time taken         : 2.563334ms
 time per sample    : 2.563334ns
 samples per second : 390116933.649692
testing transform
 sample size        : 100000000
 time taken         : 768.941767ms
 time per sample    : 7.689418ns
 samples per second : 130048859.733744
testing copy and reset z
 sample size        : 100000000
 time taken         : 880.893920ms
 time per sample    : 8.808939ns
 samples per second : 113521046.892911
testing hand_roll
 sample size        : 100000000
 time taken         : 769.276240ms
 time per sample    : 7.692762ns
 samples per second : 129992315.894223
testing assign through custom iterator
 sample size        : 100000000
 time taken         : 689.493098ms
 time per sample    : 6.894931ns
 samples per second : 145034084.155546

最终答案

通过自定义转换迭代器进行分配。

您工具箱的礼物

template<class Container, class Iter, class TransformFunction>
void assign_transform(Container& target, Iter first, Iter last, TransformFunction func)
{
    struct transform_iterator : Iter
    {
        using base_class = Iter;
        using value_type = typename Iter::value_type;

        transform_iterator(Iter base, TransformFunction& f)
        : base_class(base), func(std::addressof(f))
        {}

        value_type operator*() const {
            auto const& src = base_class::operator*();
            return (*func)(src);
        }
        TransformFunction* func;
    };

    target.assign(transform_iterator(first, func),
                  transform_iterator(last, func));
}

像这样使用：

         assign_transform(target, source.begin(), source.end(),
                          [](auto& from)
         {
             return PLY(from.x, from.y, 0.0);
         });

Answer 3

如果有，您的编译器可能会找到它。尽可能简单明了地编写代码。这将为编译器提供最佳机会来优化副本和循环，如果这在您的平台上有意义的话。

Answer 4

听起来像std::transform的工作，有一个小lambda来对每个元素进行转换。

Answer 5

带有默认分配器的向量存在两个问题：

如果向量调整为更大的大小，则初始化的每个元素和初始化都有成本，
如果为向量和元素插入保留的内存，因为更新了向量的大小，每次插入都会有成本。

在讨论的主题中摆脱这个：

Is this behavior of vector::resize(size_type n) under C++11 and Boost.Container correct?

我们可以使用自定义分配器拒绝进行任何初始化，当使用所需大小创建的向量时，我们可以使用memcpy或for循环来复制数据：

#include <vector>
#include <cstring>
template <class T>
class no_init_alloc
    : public std::allocator<T>
{
public:
    using std::allocator<T>::allocator;

    template <class U, class... Args> void construct(U*, Args&&...) {}
};
struct PLY
{
    float x, y , z;
};
int main()
{
    std::vector<PLY> source(1000000);
    //create a vector with the custom allocator refusing any initalization
    std::vector<PLY, no_init_alloc<PLY>> target(source.size());
    //then we can use memcpy approach
    {
        memcpy(target.data(), source.data(), source.size() * sizeof(source.front()));
        for(auto& t : target) t.z = 0.0f;
    }
    // or simple for loop approach
    {
         size_t sz = target.size();
         for(size_t i = 0; i < sz; ++i) {
            target[i].x = source[i].x;
            target[i].y = source[i].y;
            target[i].z = 0.0f;
         }
    }
    //convert vector<PLY, no_init_alloc<PLY>> to vector<PLY>
    std::vector<PLY> result {std::move(*reinterpret_cast<std::vector<PLY>*>(&target))};
}

使用@Richard Hodges的-O2优化基准测试结果是：

CLNAG：

testing transform
 sample size        : 1000000
 time taken         : 8.363995ms
 time per sample    : 8.363995ns
 samples per second : 119560090.602637
testing assign through custom iterator
 sample size        : 1000000
 time taken         : 7.162974ms
 time per sample    : 7.162974ns
 samples per second : 139606816.945029
testing no_init_alloc_memcpy
 sample size        : 1000000
 time taken         : 6.918533ms
 time per sample    : 6.918533ns
 samples per second : 144539312.018892
testing no_init_alloc_for
 sample size        : 1000000
 time taken         : 6.383721ms
 time per sample    : 6.383721ns
 samples per second : 156648450.018414

GCC

testing transform
 sample size        : 1000000
 time taken         : 12.083038ms
 time per sample    : 12.083038ns
 samples per second : 82760643.473934
testing assign through custom iterator
 sample size        : 1000000
 time taken         : 6.188324ms
 time per sample    : 6.188324ns
 samples per second : 161594641.780230
testing no_init_alloc_memcpy
 sample size        : 1000000
 time taken         : 3.000699ms
 time per sample    : 3.000699ns
 samples per second : 333255684.758785
testing no_init_alloc_for
 sample size        : 1000000
 time taken         : 1.979482ms
 time per sample    : 1.979482ns
 samples per second : 505182669.001284

最终答案：

使用带有简单for循环的自定义非初始化分配器

使用C ++中的特定更改复制向量的最快方法

5 个答案:

第一个回答

样本结果

最终答案

您工具箱的礼物