我已经完成了数学库的第一个版本,并且为了下一步,我想转向表达式模板来提高代码的性能。但是,我的初步结果与我的预期不同。我正在MSVC 2010中编译,在vanilla Release模式下(并且可以使用C ++ 0x)。
为我将向您展示的大量代码提前道歉,在让人们看到我正在做的事情的同时,我尽可能地做到这一点。分析框架:
#include <algorithm>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <iterator>
#include <limits>
#include <type_traits>
#include <vector>
namespace math
{
class vector; // to be determined
std::ostream& operator<<(std::ostream& stream, const vector& vec)
{
for (std::size_t i = 0; i < 4; ++i)
stream << vec[i] << " ";
return stream;
}
}
// test framework
typedef std::vector<math::vector> array_type[3];
typedef std::vector<math::vector> vector_type;
float generate_float()
{
return static_cast<float>(rand());
}
math::vector generate_vector()
{
return math::vector(generate_float(), generate_float(),
generate_float(), generate_float());
}
vector_type generate_source(std::size_t count)
{
vector_type result; result.reserve(count);
std::generate_n(std::back_inserter(result), count, generate_vector);
return result;
}
double test(const array_type& source,
vector_type& results, std::size_t iterations)
{
// time
std::clock_t begin = std::clock();
for (std::size_t i = 0; i < iterations; ++i)
{
const math::vector& v0 = source[0][i];
const math::vector& v1 = source[1][i];
const math::vector& v2 = source[2][i];
math::vector result(v0 + v1 + v2);
results.push_back(result);
}
std::clock_t end = std::clock();
// print time
double elapsed = static_cast<double>(end - begin) / CLOCKS_PER_SEC;
std::cout << "time: " << elapsed << "\n";
return elapsed;
}
int main()
{
// prepare tests
const std::size_t time_count = 50; // number of times to get time count
const std::size_t test_count = 10000000; // number of iterations in a test
std::cout << "allocating..." << std::endl;
std::vector<double> timeResults; timeResults.reserve(time_count);
array_type source;
for (std::size_t i = 0; i < 3; ++i)
source[i] = generate_source(test_count);
vector_type results;
results.reserve(test_count);
// pre tests
std::cout << "pre-testing..." << std::endl;
for (std::size_t i = 0; i < time_count / 10; ++i)
{
timeResults.push_back(test(source, results, test_count));
results.clear();
}
timeResults.clear();
// begin tests
std::cout << "testing..." << std::endl;
for (std::size_t i = 0; i < time_count; ++i)
{
timeResults.push_back(test(source, results, test_count));
results.clear();
}
// can be turned into functors for non-C++0x, for testing in C++03
double min = std::numeric_limits<double>::max();
double max = std::numeric_limits<double>::min();
std::for_each(timeResults.begin(), timeResults.end(),
[&min, &max](double x)
{
min = std::min(x, min);
max = std::max(x, max);
});
double sum = 0; // throws out max and min results
bool minFlag = false, maxFlag = false;
std::for_each(timeResults.begin(), timeResults.end(),
[min, max, &sum, &minFlag, &maxFlag](double x)
{
if (!minFlag && x <= min)
minFlag = true; // skip
else if (!maxFlag && x >= max)
maxFlag = true; // skip
else
sum += x; // add
});
// print results
double average = sum / (timeResults.size() - 2);
std::cout << "\ntotal time: " << sum << " average time: " << average
<< "\n" << "min: " << min << " max: " << max << std::endl;
}
表达式模板载体:
namespace math
{
// core expression template
template <typename E>
class vector_expression
{
public:
template <typename std::size_t I>
float get() const
{
return static_cast<const E&>(*this).get<I>();
}
protected:
~vector_expression() {} // not a public base
};
// vector class
class vector : public vector_expression<vector>
{
public:
vector()
{
data[0] = data[1] = data[2] = data[3] = 0;
}
vector(float x, float y, float z, float w)
{
data[0] = x; data[1] = y; data[2] = z; data[3] = w;
}
template <typename E>
vector(const vector_expression<E>& e)
{
evaluate<0>(e);
}
template <std::size_t I>
float get() const
{
return data[I];
}
float operator[](std::size_t index) const
{
return data[index];
}
private:
template <std::size_t I, typename E>
void evaluate(const vector_expression<E>& e,
typename std::enable_if<I < 4>::type* = nullptr)
{
data[I] = e.get<I>();
evaluate<I + 1>(e);
}
template <std::size_t I, typename E>
void evaluate(const vector_expression<E>& e,
typename std::enable_if<I >= 4>::type* = nullptr)
{
// done
}
float data[4];
};
template <typename E1, typename E2>
class vector_expression_sum :
public vector_expression<vector_expression_sum<E1, E2>>
{
public:
vector_expression_sum(const vector_expression<E1>& first,
const vector_expression<E2>& second) :
mFirst(first),
mSecond(second)
{}
template <typename std::size_t I>
float get() const
{
return mFirst.get<I>() + mSecond.get<I>();
}
private:
const vector_expression<E1>& mFirst;
const vector_expression<E2>& mSecond;
};
template <typename E1, typename E2>
vector_expression_sum<E1, E2>
operator+(const vector_expression<E1>& first,
const vector_expression<E2>& second)
{
return vector_expression_sum<E1, E2>(first, second);
}
}
手动内联:
namespace math
{
// same definition
}
// ...
double test(const array_type& source,
vector_type& results, std::size_t iterations)
{
// ...
{
// ...
math::vector result(v0.get<0>() + v1.get<0>() + v2.get<0>(),
v0.get<1>() + v1.get<1>() + v2.get<1>(),
v0.get<2>() + v1.get<2>() + v2.get<2>(),
v0.get<3>() + v1.get<3>() + v2.get<3>());
// ...
}
// ...
}
// ...
结果:
表达式模板:
总时间:14.172平均时间:0.29525
min:0.281 max:0.422手动内联:
总时间:8.438平均时间:0.175792
min:0.171 max:0.188
正如您所看到的,表达式模板(显然)并没有变成完全内联的代码。以下是test()
的反汇编,最后一次调用std::clock()
:
表达式模板程序集:
test:
00401110 push ebp
00401111 mov ebp,esp
00401113 sub esp,38h
00401116 mov eax,dword ptr [___security_cookie (404018h)]
0040111B xor eax,ebp
0040111D mov dword ptr [ebp-4],eax
00401120 push ebx
00401121 push esi
00401122 mov esi,ecx
00401124 mov dword ptr [ebp-28h],esi
00401127 call dword ptr [__imp__clock (4030DCh)]
0040112D xor ebx,ebx
0040112F mov dword ptr [ebp-1Ch],eax
00401132 mov dword ptr [ebp-24h],ebx
00401135 jmp test+2Ah (40113Ah)
00401137 mov esi,dword ptr [ebp-28h]
0040113A mov eax,dword ptr [esi+20h]
0040113D mov edx,dword ptr [esi+10h]
00401140 mov ecx,dword ptr [esi]
00401142 add eax,ebx
00401144 mov dword ptr [ebp-18h],eax
00401147 add edx,ebx
00401149 add ecx,ebx
0040114B lea eax,[ebp-30h]
0040114E call math::operator+<math::vector,math::vector> (401E60h)
00401153 mov edx,dword ptr [ebp-18h]
00401156 mov ecx,eax
00401158 lea eax,[ebp-38h]
0040115B call math::operator+<math::vector,math::vector> (401E60h)
00401160 mov ecx,dword ptr [eax]
00401162 mov edx,dword ptr [ecx+4]
00401165 fld dword ptr [edx]
00401167 mov edx,dword ptr [ecx]
00401169 fadd dword ptr [edx]
0040116B mov eax,dword ptr [eax+4]
0040116E mov edx,dword ptr [ecx+4]
00401171 fstp dword ptr [ebp-18h]
00401174 fld dword ptr [ebp-18h]
00401177 fadd dword ptr [eax]
00401179 fstp dword ptr [ebp-14h]
0040117C fld dword ptr [edx+4]
0040117F mov edx,dword ptr [ecx]
00401181 fadd dword ptr [edx+4]
00401184 mov edx,dword ptr [ecx+4]
00401187 fstp dword ptr [ebp-18h]
0040118A fld dword ptr [ebp-18h]
0040118D fadd dword ptr [eax+4]
00401190 fstp dword ptr [ebp-10h]
00401193 fld dword ptr [edx+8]
00401196 mov edx,dword ptr [ecx]
00401198 fadd dword ptr [edx+8]
0040119B mov edx,dword ptr [ecx+4]
0040119E mov ecx,dword ptr [ecx]
004011A0 fstp dword ptr [ebp-18h]
004011A3 fld dword ptr [ebp-18h]
004011A6 fadd dword ptr [eax+8]
004011A9 fstp dword ptr [ebp-0Ch]
004011AC fld dword ptr [edx+0Ch]
004011AF lea edx,[ebp-14h]
004011B2 fadd dword ptr [ecx+0Ch]
004011B5 fstp dword ptr [ebp-18h]
004011B8 fld dword ptr [ebp-18h]
004011BB fadd dword ptr [eax+0Ch]
004011BE mov eax,dword ptr [edi+4]
004011C1 fstp dword ptr [ebp-8]
004011C4 cmp edx,eax
004011C6 jae test+12Ch (40123Ch)
004011C8 mov edx,dword ptr [edi]
004011CA lea ecx,[ebp-14h]
004011CD cmp edx,ecx
004011CF ja test+12Ch (40123Ch)
004011D1 mov esi,ecx
004011D3 mov ecx,dword ptr [edi+8]
004011D6 sub esi,edx
004011D8 cmp eax,ecx
004011DA jne test+10Bh (40121Bh)
004011DC sub eax,edx
004011DE sar eax,4
004011E1 cmp eax,0FFFFFFEh
004011E6 ja test+201h (401311h)
004011EC sub ecx,edx
004011EE inc eax
004011EF sar ecx,4
004011F2 cmp eax,ecx
004011F4 jbe test+10Bh (40121Bh)
004011F6 mov edx,ecx
004011F8 shr edx,1
004011FA mov ebx,0FFFFFFFh
004011FF sub ebx,edx
00401201 cmp ebx,ecx
00401203 jae test+0F9h (401209h)
00401205 xor ecx,ecx
00401207 jmp test+0FBh (40120Bh)
00401209 add ecx,edx
0040120B cmp ecx,eax
0040120D jae test+101h (401211h)
0040120F mov ecx,eax
00401211 mov edx,edi
00401213 call std::vector<math::vector,std::allocator<math::vector> >::reserve (401930h)
00401218 mov ebx,dword ptr [ebp-24h]
0040121B mov eax,dword ptr [edi+4]
0040121E and esi,0FFFFFFF0h
00401221 add esi,dword ptr [edi]
00401223 test eax,eax
00401225 je test+18Fh (40129Fh)
00401227 mov edx,dword ptr [esi]
00401229 mov dword ptr [eax],edx
0040122B mov ecx,dword ptr [esi+4]
0040122E mov dword ptr [eax+4],ecx
00401231 mov edx,dword ptr [esi+8]
00401234 mov dword ptr [eax+8],edx
00401237 mov ecx,dword ptr [esi+0Ch]
0040123A jmp test+18Ch (40129Ch)
0040123C mov ecx,dword ptr [edi+8]
0040123F cmp eax,ecx
00401241 jne test+171h (401281h)
00401243 mov edx,dword ptr [edi]
00401245 sub eax,edx
00401247 sar eax,4
0040124A cmp eax,0FFFFFFEh
0040124F ja test+201h (401311h)
00401255 sub ecx,edx
00401257 inc eax
00401258 sar ecx,4
0040125B cmp eax,ecx
0040125D jbe test+171h (401281h)
0040125F mov edx,ecx
00401261 shr edx,1
00401263 mov esi,0FFFFFFFh
00401268 sub esi,edx
0040126A cmp esi,ecx
0040126C jae test+162h (401272h)
0040126E xor ecx,ecx
00401270 jmp test+164h (401274h)
00401272 add ecx,edx
00401274 cmp ecx,eax
00401276 jae test+16Ah (40127Ah)
00401278 mov ecx,eax
0040127A mov edx,edi
0040127C call std::vector<math::vector,std::allocator<math::vector> >::reserve (401930h)
00401281 mov eax,dword ptr [edi+4]
00401284 test eax,eax
00401286 je test+18Fh (40129Fh)
00401288 mov edx,dword ptr [ebp-14h]
0040128B mov ecx,dword ptr [ebp-10h]
0040128E mov dword ptr [eax],edx
00401290 mov edx,dword ptr [ebp-0Ch]
00401293 mov dword ptr [eax+4],ecx
00401296 mov ecx,dword ptr [ebp-8]
00401299 mov dword ptr [eax+8],edx
0040129C mov dword ptr [eax+0Ch],ecx
0040129F add dword ptr [edi+4],10h
004012A3 add ebx,10h
004012A6 mov dword ptr [ebp-24h],ebx
004012A9 cmp ebx,9896800h
004012AF jb test+27h (401137h)
004012B5 call dword ptr [__imp__clock (4030DCh)]
手动内联汇编:
test:
004010B0 push ebp
004010B1 mov ebp,esp
004010B3 sub esp,28h
004010B6 mov eax,dword ptr [___security_cookie (404018h)]
004010BB xor eax,ebp
004010BD mov dword ptr [ebp-4],eax
004010C0 push ebx
004010C1 push esi
004010C2 mov esi,ecx
004010C4 mov dword ptr [ebp-24h],esi
004010C7 call dword ptr [__imp__clock (4030DCh)]
004010CD xor ebx,ebx
004010CF mov dword ptr [ebp-1Ch],eax
004010D2 mov dword ptr [ebp-18h],ebx
004010D5 mov eax,dword ptr [esi]
004010D7 mov ecx,dword ptr [esi+10h]
004010DA fld dword ptr [eax+ebx]
004010DD fadd dword ptr [ecx+ebx]
004010E0 mov edx,dword ptr [esi+20h]
004010E3 add eax,ebx
004010E5 add ecx,ebx
004010E7 fadd dword ptr [edx+ebx]
004010EA add edx,ebx
004010EC fstp dword ptr [ebp-14h]
004010EF fld dword ptr [ecx+4]
004010F2 fadd dword ptr [eax+4]
004010F5 fadd dword ptr [edx+4]
004010F8 fstp dword ptr [ebp-10h]
004010FB fld dword ptr [ecx+8]
004010FE fadd dword ptr [eax+8]
00401101 fadd dword ptr [edx+8]
00401104 fstp dword ptr [ebp-0Ch]
00401107 fld dword ptr [ecx+0Ch]
0040110A lea ecx,[ebp-14h]
0040110D fadd dword ptr [eax+0Ch]
00401110 mov eax,dword ptr [edi+4]
00401113 fadd dword ptr [edx+0Ch]
00401116 fstp dword ptr [ebp-8]
00401119 cmp ecx,eax
0040111B jae test+0E4h (401194h)
0040111D mov edx,dword ptr [edi]
0040111F cmp edx,ecx
00401121 ja test+0E4h (401194h)
00401123 mov esi,ecx
00401125 mov ecx,dword ptr [edi+8]
00401128 sub esi,edx
0040112A cmp eax,ecx
0040112C jne test+0BDh (40116Dh)
0040112E sub eax,edx
00401130 sar eax,4
00401133 cmp eax,0FFFFFFEh
00401138 ja test+1BCh (40126Ch)
0040113E sub ecx,edx
00401140 inc eax
00401141 sar ecx,4
00401144 cmp eax,ecx
00401146 jbe test+0BDh (40116Dh)
00401148 mov edx,ecx
0040114A shr edx,1
0040114C mov ebx,0FFFFFFFh
00401151 sub ebx,edx
00401153 cmp ebx,ecx
00401155 jae test+0ABh (40115Bh)
00401157 xor ecx,ecx
00401159 jmp test+0ADh (40115Dh)
0040115B add ecx,edx
0040115D cmp ecx,eax
0040115F jae test+0B3h (401163h)
00401161 mov ecx,eax
00401163 mov edx,edi
00401165 call std::vector<math::vector,std::allocator<math::vector> >::reserve (401890h)
0040116A mov ebx,dword ptr [ebp-18h]
0040116D mov eax,dword ptr [edi+4]
00401170 and esi,0FFFFFFF0h
00401173 add esi,dword ptr [edi]
00401175 test eax,eax
00401177 je test+0DFh (40118Fh)
00401179 mov edx,dword ptr [esi]
0040117B mov dword ptr [eax],edx
0040117D mov ecx,dword ptr [esi+4]
00401180 mov dword ptr [eax+4],ecx
00401183 mov edx,dword ptr [esi+8]
00401186 mov dword ptr [eax+8],edx
00401189 mov ecx,dword ptr [esi+0Ch]
0040118C mov dword ptr [eax+0Ch],ecx
0040118F mov esi,dword ptr [ebp-24h]
00401192 jmp test+14Ah (4011FAh)
00401194 mov ecx,dword ptr [edi+8]
00401197 cmp eax,ecx
00401199 jne test+12Ch (4011DCh)
0040119B mov edx,dword ptr [edi]
0040119D sub eax,edx
0040119F sar eax,4
004011A2 cmp eax,0FFFFFFEh
004011A7 ja test+1BCh (40126Ch)
004011AD sub ecx,edx
004011AF inc eax
004011B0 sar ecx,4
004011B3 cmp eax,ecx
004011B5 jbe test+12Ch (4011DCh)
004011B7 mov edx,ecx
004011B9 shr edx,1
004011BB mov esi,0FFFFFFFh
004011C0 sub esi,edx
004011C2 cmp esi,ecx
004011C4 jae test+11Ah (4011CAh)
004011C6 xor ecx,ecx
004011C8 jmp test+11Ch (4011CCh)
004011CA add ecx,edx
004011CC cmp ecx,eax
004011CE jae test+122h (4011D2h)
004011D0 mov ecx,eax
004011D2 mov edx,edi
004011D4 call std::vector<math::vector,std::allocator<math::vector> >::reserve (401890h)
004011D9 mov esi,dword ptr [ebp-24h]
004011DC mov eax,dword ptr [edi+4]
004011DF test eax,eax
004011E1 je test+14Ah (4011FAh)
004011E3 mov edx,dword ptr [ebp-14h]
004011E6 mov ecx,dword ptr [ebp-10h]
004011E9 mov dword ptr [eax],edx
004011EB mov edx,dword ptr [ebp-0Ch]
004011EE mov dword ptr [eax+4],ecx
004011F1 mov ecx,dword ptr [ebp-8]
004011F4 mov dword ptr [eax+8],edx
004011F7 mov dword ptr [eax+0Ch],ecx
004011FA add dword ptr [edi+4],10h
004011FE add ebx,10h
00401201 mov dword ptr [ebp-18h],ebx
00401204 cmp ebx,9896800h
0040120A jb test+25h (4010D5h)
00401210 call dword ptr [__imp__clock (4030DCh)]
结论:无论出于何种原因,MSVC2010都不会内联operator+
的调用。有谁知道这是为什么?即使把__forceinline
(我想避免)也没有内联。
更新:作为jdv-Jan de Vaan mentioned,当我删除析构函数时:
// ~vector_expression() {} // not a public base
它内联operator+
。奇怪的是它将它内联到不同的程序集,我的测试表明这个输出虽然比我原来的表现更好,但仍然没有达到与手动内联版本相同的状态。任何想法为什么会这样?
00A710B0 push ebp
00A710B1 mov ebp,esp
00A710B3 sub esp,28h
00A710B6 mov eax,dword ptr [___security_cookie (0A74018h)]
00A710BB xor eax,ebp
00A710BD mov dword ptr [ebp-4],eax
00A710C0 push ebx
00A710C1 push esi
00A710C2 mov esi,ecx
00A710C4 mov dword ptr [ebp-24h],esi
00A710C7 call dword ptr [__imp__clock (0A730DCh)]
00A710CD xor ebx,ebx
00A710CF mov dword ptr [ebp-1Ch],eax
00A710D2 mov dword ptr [ebp-28h],ebx
00A710D5 mov eax,dword ptr [esi]
00A710D7 mov ecx,dword ptr [esi+10h]
00A710DA fld dword ptr [eax+ebx]
00A710DD fadd dword ptr [ecx+ebx]
00A710E0 mov edx,dword ptr [esi+20h]
00A710E3 add eax,ebx
00A710E5 add ecx,ebx
00A710E7 fstp dword ptr [ebp-18h]
00A710EA add edx,ebx
00A710EC fld dword ptr [ebp-18h]
00A710EF fadd dword ptr [edx]
00A710F1 fstp dword ptr [ebp-14h]
00A710F4 fld dword ptr [eax+4]
00A710F7 fadd dword ptr [ecx+4]
00A710FA fstp dword ptr [ebp-18h]
00A710FD fld dword ptr [ebp-18h]
00A71100 fadd dword ptr [edx+4]
00A71103 fstp dword ptr [ebp-10h]
00A71106 fld dword ptr [eax+8]
00A71109 fadd dword ptr [ecx+8]
00A7110C fstp dword ptr [ebp-18h]
00A7110F fld dword ptr [ebp-18h]
00A71112 fadd dword ptr [edx+8]
00A71115 fstp dword ptr [ebp-0Ch]
00A71118 fld dword ptr [eax+0Ch]
00A7111B mov eax,dword ptr [edi+4]
00A7111E fadd dword ptr [ecx+0Ch]
00A71121 lea ecx,[ebp-14h]
00A71124 fstp dword ptr [ebp-18h]
00A71127 fld dword ptr [ebp-18h]
00A7112A fadd dword ptr [edx+0Ch]
00A7112D fstp dword ptr [ebp-8]
00A71130 cmp ecx,eax
00A71132 jae test+0FBh (0A711ABh)
00A71134 mov edx,dword ptr [edi]
00A71136 cmp edx,ecx
00A71138 ja test+0FBh (0A711ABh)
00A7113A mov esi,ecx
00A7113C mov ecx,dword ptr [edi+8]
00A7113F sub esi,edx
00A71141 cmp eax,ecx
00A71143 jne test+0D4h (0A71184h)
00A71145 sub eax,edx
00A71147 sar eax,4
00A7114A cmp eax,0FFFFFFEh
00A7114F ja test+1D3h (0A71283h)
00A71155 sub ecx,edx
00A71157 inc eax
00A71158 sar ecx,4
00A7115B cmp eax,ecx
00A7115D jbe test+0D4h (0A71184h)
00A7115F mov edx,ecx
00A71161 shr edx,1
00A71163 mov ebx,0FFFFFFFh
00A71168 sub ebx,edx
00A7116A cmp ebx,ecx
00A7116C jae test+0C2h (0A71172h)
00A7116E xor ecx,ecx
00A71170 jmp test+0C4h (0A71174h)
00A71172 add ecx,edx
00A71174 cmp ecx,eax
00A71176 jae test+0CAh (0A7117Ah)
00A71178 mov ecx,eax
00A7117A mov edx,edi
00A7117C call std::vector<math::vector,std::allocator<math::vector> >::reserve (0A718A0h)
00A71181 mov ebx,dword ptr [ebp-28h]
00A71184 mov eax,dword ptr [edi+4]
00A71187 and esi,0FFFFFFF0h
00A7118A add esi,dword ptr [edi]
00A7118C test eax,eax
00A7118E je test+0F6h (0A711A6h)
00A71190 mov edx,dword ptr [esi]
00A71192 mov dword ptr [eax],edx
00A71194 mov ecx,dword ptr [esi+4]
00A71197 mov dword ptr [eax+4],ecx
00A7119A mov edx,dword ptr [esi+8]
00A7119D mov dword ptr [eax+8],edx
00A711A0 mov ecx,dword ptr [esi+0Ch]
00A711A3 mov dword ptr [eax+0Ch],ecx
00A711A6 mov esi,dword ptr [ebp-24h]
00A711A9 jmp test+161h (0A71211h)
00A711AB mov ecx,dword ptr [edi+8]
00A711AE cmp eax,ecx
00A711B0 jne test+143h (0A711F3h)
00A711B2 mov edx,dword ptr [edi]
00A711B4 sub eax,edx
00A711B6 sar eax,4
00A711B9 cmp eax,0FFFFFFEh
00A711BE ja test+1D3h (0A71283h)
00A711C4 sub ecx,edx
00A711C6 inc eax
00A711C7 sar ecx,4
00A711CA cmp eax,ecx
00A711CC jbe test+143h (0A711F3h)
00A711CE mov edx,ecx
00A711D0 shr edx,1
00A711D2 mov esi,0FFFFFFFh
00A711D7 sub esi,edx
00A711D9 cmp esi,ecx
00A711DB jae test+131h (0A711E1h)
00A711DD xor ecx,ecx
00A711DF jmp test+133h (0A711E3h)
00A711E1 add ecx,edx
00A711E3 cmp ecx,eax
00A711E5 jae test+139h (0A711E9h)
00A711E7 mov ecx,eax
00A711E9 mov edx,edi
00A711EB call std::vector<math::vector,std::allocator<math::vector> >::reserve (0A718A0h)
00A711F0 mov esi,dword ptr [ebp-24h]
00A711F3 mov eax,dword ptr [edi+4]
00A711F6 test eax,eax
00A711F8 je test+161h (0A71211h)
00A711FA mov edx,dword ptr [ebp-14h]
00A711FD mov ecx,dword ptr [ebp-10h]
00A71200 mov dword ptr [eax],edx
00A71202 mov edx,dword ptr [ebp-0Ch]
00A71205 mov dword ptr [eax+4],ecx
00A71208 mov ecx,dword ptr [ebp-8]
00A7120B mov dword ptr [eax+8],edx
00A7120E mov dword ptr [eax+0Ch],ecx
00A71211 add dword ptr [edi+4],10h
00A71215 add ebx,10h
00A71218 mov dword ptr [ebp-28h],ebx
00A7121B cmp ebx,9896800h
00A71221 jb test+25h (0A710D5h)
00A71227 call dword ptr [__imp__clock (0A730DCh)]
答案 0 :(得分:4)
我之前已经对这个问题发表了评论。我担心空用户定义的析构函数的存在,这可能会禁用内联。经过一些谷歌搜索后,我更有信心这可能实际上是答案。
This answer描述了一种与您在问题中描述的内容非常接近的情况。在这里,即使设置了operator+
,用户定义的析构函数也会阻止__forceinline
的内联。此处还有一些有用的调试技巧。
还有bug report in microsoft connect。我第一次在discussion of the safeint library on channel9上听说过它。