我遇到了一个非常奇怪的事情,两天前我尝试调试代码。
我在Windows 7 64位操作系统上运行代码。总的来说,我通过知道输入信号来计算数学模型,该输入信号将应用于控制算法SOOP
。主要的计算是正常的,但在soop
函数中做同样的事情我有接近但不完全相同的结果。为什么?
我也厌倦了float
而且得到了相同的结果。
如果我计算的函数不在主函数中,double
是否舍入?
主:
#include <unistd.h>
#include <iostream>
#include <windows.h>
#include <fstream>
#include <math.h>
#include "model.h"
#include "SOOP.h"
using namespace std;
int main( void )
{
ofstream myfile;
myfile.open ("modelTest.txt");
stateE current_state; // = new stateE;
initstateE(current_state);
current_state.variables[0] = 0.0;
current_state.variables[0] = -3.0*PI/2.0;
cout << "program init" << endl;
// ==============Model testing
myfile << current_state.variables[0] <<", "<< current_state.variables[2] <<", "<<current_state.reward << ", "<< fmod((current_state.variables[0])+ PI, 2.0 * PI) -PI <<endl;
double utmp = -10.0;
for (int i=0;i<3;i++)
{
current_state.variables[0] = -3.0*PI/2.0;
copyStateE(current_state, nextstateE(current_state, utmp));
utmp = utmp+10.0;
myfile << current_state.variables[0] <<", "<< current_state.variables[2] <<", "<<current_state.reward << ", "<< fmod((current_state.variables[0])+ PI, 2.0 * PI) -PI <<endl;
}
// ==============SOOP testing
initstateE(current_state);
current_state.variables[0] = -3.0*PI/2.0;
double commnadSignal[initlen];
for(int i=0; i<1; i++)
{
memcpy(commnadSignal,soop(current_state),sizeof(double)*initlen);
copyStateE(current_state,nextstateE(current_state, commnadSignal[0]));
}
myfile.close();
return 0;
}
模型库:
stateE nextstateE(stateE s, double votlageSignal) {
std::cout<< "in nextstateE"<< std::endl;
stateE newstateE;// = (stateE*)malloc(sizeof(stateE));
memcpy(newstateE.variables, s.variables, sizeof(double) * nbHiddenstateEVariables);
newstateE.isTerminal = s.isTerminal;
std::cout << " v "<< votlageSignal << " l "<< s.variables[0] << std::endl;
double Vm = votlageSignal;
double lambda = s.variables[0];
double dLambda = s.variables[1];
double theta = s.variables[2];
double dTheta = s.variables[3];
double thetaNext = theta + timeStep*dTheta;
double dThetaNext = dTheta + timeStep*( (1/(aP*cP-pow(bP,2)*pow(cos(lambda),2))) *(-bP*cP*sin(lambda)*pow(dLambda,2)+bP*dP*sin(lambda)*cos(lambda)-cP*eP*dTheta+cP*fP*Vm) );
double lambdaNext = lambda + timeStep*dLambda;
double dLambdaNext = dLambda + timeStep*( (1/(aP*cP-pow(bP,2)*pow(cos(lambda),2))) *(aP*dP*sin(lambda)-pow(bP,2)*sin(lambda)*cos(lambda)*pow(dLambda,2)-bP*eP*cos(lambda)*dTheta+bP*fP*cos(lambda)*Vm) );
//========== Normalization of lambda and theta
if (lambdaNext>0)
lambdaNext = fmod(lambdaNext+PI, 2.0*PI)-PI;
else
lambdaNext = fmod(lambdaNext-PI, 2.0*PI)+PI;
if (thetaNext>0)
thetaNext = fmod(thetaNext+PI, 2.0*PI)-PI;
else
thetaNext = fmod(thetaNext-PI, 2.0*PI)+PI;
newstateE.variables[0] = lambdaNext;
newstateE.variables[1] = dLambdaNext;
newstateE.variables[2] = thetaNext;
newstateE.variables[3] = dThetaNext;
double newReward;
if(s.isTerminal) {
newReward = 0.0;
} else {
newReward = 1 - (pow(lambdaNext,2.0)*qReward + pow(votlageSignal,2.0)*rReward)/(pow(PI,2.0)*qReward+ pow(maxVoltage,2.0)*rReward);
}
newstateE.reward=newReward;
return newstateE;
}
void copyStateE(stateE& destination, stateE original)
{
initstateE(destination);
destination.variables[0] = original.variables[0]; // lambda
destination.variables[1] = original.variables[1]; // dLambda
destination.variables[2] = original.variables[2]; // theta
destination.variables[3] = original.variables[3]; // dTheta
destination.isTerminal = 0;
destination.reward = original.reward;
}
控制器库使用模型库:
double * soop(stateE& s0)
{
ofstream myfile;
myfile.open ("soopTest.txt");
bool stopFlag = false;
unsigned int counter = 0;
unsigned int currentBudget = 0;
unsigned int listLenght = 0;
unsigned int maxLengthSequence = 0;
double tmpU = -10.0;
for (int i=0;i<3;i++)
{
double sPtmp[initlen] = {0};
unsigned int stmp[initlen] = {0};
stateE newstateE[initlen];
stateE tmpE;
sPtmp[0] = ((double)i)/3.0;
stmp[0] = 1;
initstateE(tmpE);
tmpE.variables[0]=-3.0*PI/2.0;
copyStateE(tmpE, nextstateE(tmpE , tmpU ));
tmpU = tmpU + 10.0;
newstateE[0] = nextstateE(s0, unnormU(middleOfBox((double)i/3.0, stmp[0])) );
myfile << "U norm "<< middleOfBox((double)i/3.0, stmp[0])<< " Unorm "<<unnormU(middleOfBox((double)i/3.0, stmp[0])) << endl;
myfile <<"alp " << tmpE.variables[0] << " th " << tmpE.variables[2] << " R " << tmpE.reward << endl;
myfile <<"alp " << newstateE[0].variables[0] << " th " << newstateE[0].variables[2] << " R " << newstateE[0] .reward << endl;
myfile << endl;
}
myfile.close();
}
两个文本文件modelTest.txt
和soop Test.txt
应该显示相同的结果,因为我在主要和soop中应用相同的输入参数(s0和[-10,0,10])功能。但是我在文本文件中得到了不同的结果。
soop Test
档案:
U norm 0.166667 Unorm -10
alp 1.5708 th 0 R 0.75
alp 1.5708 th 0 R 0.75
U norm 0.5 Unorm 0
alp 1.5708 th 0 R 0.75
alp 1.5708 th 0 R 0.75
U norm 0.833333 Unorm 10
alp 1.5708 th 0 R 0.75
alp 1.5708 th 0 R 0.75
modelTest
档案:
alp 1.5708, th 0, R 0.75 (for s0 and -10)
alp 1.57084, th -0.000115852, R 0.749986 (for s0 and 0)
alp 1.57088, th -0.000230942, R 0.749972 (for s0 and 10)
答案 0 :(得分:1)
我的错误,在main
我没有初始化current_state
的所有状态(只有一个角度)。 initStateE(current_state)
循环中缺少main
。
下一个错误,即voltageSignal
的奖励因子,'rReward = 0',因此奖励不会随控制值的变化而变化。
最大的错误是假设系统角度状态参数在第一次模拟后会发生变化。我正在使用Euler离散对于 f(Q(t),U(t)) - 二阶微分方程,因此控制信号仅对第一次迭代中的一阶导数产生影响它在第二次迭代后有效。
P_{k+1} = P_k + daltaT * Q_k
Q_{k+1} = Q_k + deltaT * f(Q_k, U_k)
其中deltaT是采样时间。