这是Python中的代码,可在非常短的时间内生成日志空间值:
import numpy
print(numpy.logspace(0,1,num=10000000))
我尝试在C ++中模拟其输出,如下所示:
#include <iostream>
#include <cmath>
#include <vector>
std::vector<double> logspace (const double &a, const double &b, const int &k)
{
std::vector<double> logspace;
for (int i = 0; i < k; i++)
{
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
void logspace_print (std::vector<double> logspace)
{
for (auto ls : logspace)
{
std::cout << ls << "\n";
}
std::cout << "\n";
}
int main ()
{
std::vector<double> my_ls = logspace(0, 1, 10000000);
logspace_print(my_ls);
}
使用函数pow(., .)
和for
循环(也许还有许多其他原因)可以避免浮点运算,这使我的代码天真,例如运行时是相对于Python而言,它晕倒了很多。我还在Is there something like numpy.logspace in C++?看到了一些建议。但是,没有明显的差异。那么,如何修改我的代码或编写与python版本相当的新代码?
答案 0 :(得分:3)
有趣的问题!我的答案在顶部具有不同版本的功能。以下仅是基准测试代码。使用google-benchmark作为库。
std::endl
的刷新,这很好!此外,printf可能比std :: cout更快。还要看看fmtlib 2。它快速且易于使用。logspace_v3
。)它包括先运行linspace
,然后再进行10次幂运算。 -march=native -mtune=native
和快速运算符),矢量化应该开始进行。但是我不相信它。这是一些带有矢量化功能的Godbolt(第590行)3。pow
的电话。请注意,这会累积浮点错误并导致结果不准确。#include <algorithm>
#include <benchmark/benchmark.h>
#include <cmath>
#include <iostream>
#include <numeric>
#include <vector>
#include <gtest/gtest.h>
std::vector<double> logspace(double a, double b, int k) {
std::vector<double> logspace;
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
// Pre-allocate the correct size using .reserve()
std::vector<double> logspace_v1(double a, double b, int k) {
std::vector<double> logspace;
logspace.reserve(k);
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
}
return logspace;
}
/// Manually extract the constant factor.
std::vector<double> logspace_v2(double a, double b, int k) {
std::vector<double> logspace;
logspace.reserve(k);
const auto exp_scale = (b - a) / (k - 1);
for (int i = 0; i < k; i++) {
logspace.push_back(pow(10, i * exp_scale));
}
return logspace;
}
/// Copy the impl behavior of numpy.linspace: First linspace then power.
std::vector<double> logspace_v3(double a, double b, int k) {
/*
y = linspace(start, stop, num=num, endpoint=endpoint, axis=axis)
if dtype is None:
return _nx.power(base, y)
return _nx.power(base, y).astype(dtype, copy=False)
*/
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace;
logspace.reserve(k);
for (int i = 0; i < k; i++) {
logspace.push_back(i * exp_scale);
}
std::for_each(logspace.begin(), logspace.end(),
[](double &x) { x = pow(10, x); });
return logspace;
}
/// Improve on v3 by applying pow directly
std::vector<double> logspace_v4(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace(k, 0.);
std::generate(logspace.begin(), logspace.end(),
[n = -1, exp_scale]() mutable {
n++;
return pow(10, n * exp_scale);
});
return logspace;
}
/// Use generate_n : First linspace then power.
std::vector<double> logspace_v5(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
std::vector<double> logspace(k, 0.);
std::iota(logspace.begin(), logspace.end(), 0);
std::for_each(logspace.begin(), logspace.end(),
[exp_scale](double &x) { x *= exp_scale; });
std::for_each(logspace.begin(), logspace.end(),
[](double &x) { x = pow(10, x); });
return logspace;
}
std::vector<double> logspace_v6(double a, double b, int k) {
const auto exp_scale = (b - a) / (k - 1);
const auto factor = pow(10, exp_scale);
std::vector<double> logspace;
logspace.reserve(k);
// val = pow(b, i * exp_scale);
// = pow(pow(b, exp_scale), i);
// = pow(f, i); with f := pow(b, exp_scale);
// next = cur * f;
// first = pow(b, a);
double val = pow(10, a);
for (int i = 0; i < k; i++) {
logspace.push_back(val);
val *= factor;
}
return logspace;
}
template <std::vector<double> (*F)(double, double, int)>
static void LogspaceBench(benchmark::State &state) {
for (auto _ : state) {
benchmark::DoNotOptimize(F(0, 1, state.range(0)));
}
}
BENCHMARK_TEMPLATE(LogspaceBench, logspace)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v1)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v2)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v3)->Arg(1000)->Arg(10000000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v4)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v5)->Arg(1000);
BENCHMARK_TEMPLATE(LogspaceBench, logspace_v6)->Arg(1000)->Arg(10000000);
class LogspaceTest
: public testing::TestWithParam<
std::function<std::vector<double>(double, double, int)>> {};
TEST_P(LogspaceTest, IsSame) {
auto func = GetParam();
const auto actual = func(0, 1., 1000);
const auto expected = logspace(0., 1., 1000);
// TODO: Buggy with (3, 70, 1000) and (0, 1, 1000)
ASSERT_EQ(expected.size(), actual.size());
for (int i = 0; i < expected.size(); i++) {
ASSERT_DOUBLE_EQ(actual[i], expected[i]) << i;
}
}
INSTANTIATE_TEST_SUITE_P(InstantiationName, LogspaceTest,
testing::Values(logspace, logspace_v1, logspace_v2,
logspace_v3, logspace_v4, logspace_v5,
logspace_v6));
int main(int argc, char **argv) {
::benchmark::Initialize(&argc, argv);
::benchmark::RunSpecifiedBenchmarks();
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
答案 1 :(得分:1)
可以轻松地对显示的代码进行至少三个明显的优化。
1)从logspace
返回时,以C ++ 17模式进行编译,以确保复制时保留空白。
2)
std::vector<double> logspace;
for (int i = 0; i < k; i++)
使用logspace.reserve()
来预先分配向量,以避免在填充该向量的同时进行无用的重复分配。
3)
void logspace_print (std::vector<double> logspace)
在此处按值传递会创建向量的整个重复副本,没有任何用处。更改此功能,以使其通过引用使用logspace
参数。
有一种可能的微优化可能会有所不同,也可能没有任何作用
logspace.push_back(pow(10, i * (b - a) / (k - 1)));
该公式的“(b-a)/(k-1)”部分是恒定的,可以展开到循环之外。不过,我希望编译器自己执行此操作,这是一个相当基本的优化。