编辑:正如其他人所建议的那样,我发布了可以运行的整个代码,只需要安装Eigen就可以了。
这是包含三个类的主标题,总之,ADNodeInstance是一个数据持有者,ADNode封装了一个指向ADNodeInstance的指针,ADGraphBuilder是一个包含所有ADNodeInstances并管理它们的主类。:
//
// Created by alex on 07/05/15.
//
#ifndef ADVIBE_STACK_H
#define ADVIBE_STACK_H
#include "memory"
#include "iostream"
#include "fstream"
#include "Eigen/Dense"
typedef Eigen::MatrixXd Matrix;
enum class OPERATOR {
NONE,
ADD,
UNNARY_NEGATION,
MULTIPLY,
DOT_MULTIPLY, // Linear algebra
VERTICAL_CONCATENATION,
TRANSPOSE,
SUBINDEX,
TANH
};
enum TYPE{
CONSTANT,
INPUT,
INPUT_DERIVED,
GRADIENT
};
/**
* Main graph classes
*/
class ADNodeInstance {
public:
TYPE type;
int id;
OPERATOR op;
int dim1, dim2;
std::string name;
Matrix value;
std::vector<std::weak_ptr<ADNodeInstance>> parents;
std::vector<std::weak_ptr<ADNodeInstance>> children;
ADNodeInstance(TYPE type, int id, OPERATOR op, int dim1, int dim2, std::string name) :
type(type), id(id), op(op), dim1(dim1), dim2(dim2), name(name) { };
ADNodeInstance(TYPE type, int id, OPERATOR op, const Eigen::Ref<const Matrix> &value) :
type(type), id(id), op(op), dim1(value.rows()), dim2(value.cols()), value(value) { };
ADNodeInstance(TYPE type, int id, OPERATOR op, int dim1, int dim2) :
type(type), id(id), op(op), dim1(dim1), dim2(dim2) { }
~ADNodeInstance() { };
};
class ADGraphBuilder;
class ADNode{
public:
ADGraphBuilder* builder;
std::shared_ptr<ADNodeInstance> pointer;
ADNode(){};
ADNode(ADGraphBuilder* builder):
builder(builder){};
ADNode(const ADNode& node):
builder(node.builder), pointer(node.pointer){};
~ADNode() {};
inline ADNode getVariable(OPERATOR op, const int dim1, const int dim2) const;
inline ADNode apply(OPERATOR op) const{
ADNode result = getVariable(op, this->pointer->dim1, this->pointer->dim2);
result.pointer->parents.push_back(pointer);
pointer->children.push_back(result.pointer);
return result;
};
inline ADNode tanh() const{
return apply(OPERATOR::TANH);
}
inline ADNode transpose() const{
ADNode result = getVariable(OPERATOR::TRANSPOSE, this->pointer->dim2, this->pointer->dim1);
result.pointer->parents.push_back(pointer);
pointer->children.push_back(result.pointer);
return result;
}
inline ADNode operator-() const{
std::cout << pointer->id << " U " << std::endl;
std::cout << "NEG: " << builder << " - " << pointer->id << std::endl;
ADNode node = apply(OPERATOR::UNNARY_NEGATION);
std::cout << "NEG2: " << builder << " - " << pointer->id << std::endl;
return node;
};
inline ADNode block(const int loc1, const int loc2, const int dim1, const int dim2) const {
ADNode result = getVariable(OPERATOR::SUBINDEX, dim1, dim2);
result.pointer->parents.push_back(pointer);
pointer->children.push_back(result.pointer);
return result;
}
};
class ADGraphBuilder{
protected:
int counter;
public:
std::vector<ADNode> nodes;
ADGraphBuilder():
counter(0){};
~ADGraphBuilder(){};
std::vector<ADNode> gradient(const ADNode& target, const ADNode& param);
ADNode createGradientMessage(const ADNode& child, const ADNode& parent, const ADNode& directGradient);
inline ADNode createInternalVariable(const TYPE type, const OPERATOR op, const int dim1, const int dim2) {
ADNode result = ADNode(this);
result.pointer = std::make_shared<ADNodeInstance>(type, counter++, op, dim1, dim2);
nodes.push_back(result);
return result;
}
inline ADNode createInternalVariable(const TYPE type, const OPERATOR op, const int dim1, const int dim2, const std::string name) {
ADNode result = ADNode(this);
result.pointer = std::make_shared<ADNodeInstance>(type, counter++, op, dim1, dim2, name);
nodes.push_back(result);
return result;
}
inline ADNode createConstantVariable(const Eigen::Ref<const Matrix>& value){
ADNode result = ADNode(this);
result.pointer = std::make_shared<ADNodeInstance>(TYPE::CONSTANT, counter++, OPERATOR::NONE , value);
nodes.push_back(result);
return result;
}
inline ADNode createVariable(const int dim1, const int dim2){
ADNode result = ADNode(this);
result.pointer = std::make_shared<ADNodeInstance>(TYPE::INPUT, counter++, OPERATOR::NONE , dim1, dim2);
nodes.push_back(result);
return result;
}
};
inline ADNode ADNode::getVariable(OPERATOR op, const int dim1, const int dim2) const{
if(this->pointer->type == GRADIENT)
return builder->createInternalVariable(GRADIENT, op, dim1, dim2);
else
return builder->createInternalVariable(INPUT_DERIVED, op, dim1, dim2);
}
/**
* Concatenation
*/
inline ADNode vertCat(const ADNode& lhs, const ADNode& rhs) {
if (lhs.builder != rhs.builder) {
std::cerr << "Can not operate with notes from different builder instances!" << std::endl;
throw 1;
}
if (lhs.pointer->dim2 != rhs.pointer->dim2) {
std::cerr << "Can not perform vertical concatanation on nodes with different size in dimension 2." << std::endl;
throw 2;
}
ADNode result = lhs.builder->createInternalVariable(INPUT_DERIVED, OPERATOR::VERTICAL_CONCATENATION,
lhs.pointer->dim1 + rhs.pointer->dim1, lhs.pointer->dim2);
result.pointer->parents.push_back(lhs.pointer);
result.pointer->parents.push_back(rhs.pointer);
lhs.pointer->children.push_back(result.pointer);
rhs.pointer->children.push_back(result.pointer);
return result;
}
inline ADNode vertCat(const ADNode& lhs, const double rhs) {
return vertCat(lhs, lhs.builder->createConstantVariable(Matrix::Constant(1,1, rhs)));
}
/**
* Joining nodes
*/
ADNode joinNodes(const OPERATOR op, const ADNode& lhs, const ADNode& rhs){
if(lhs.builder != rhs.builder){
std::cerr << "Different builders " << std::endl;
throw 5;
}
ADNode result = lhs.builder->createInternalVariable(INPUT_DERIVED, op , 1, 1);
result.pointer->parents.push_back(lhs.pointer);
result.pointer->parents.push_back(rhs.pointer);
lhs.pointer->children.push_back(result.pointer);
rhs.pointer->children.push_back(result.pointer);
return result;
}
inline ADNode dot(const ADNode& lhs, const ADNode& rhs){
return joinNodes(OPERATOR::DOT_MULTIPLY, lhs, rhs);
}
inline ADNode tanh(const ADNode& lhs){
return lhs.tanh();
}
inline ADNode operator*(const ADNode& lhs, const ADNode& rhs){
std::cout << "MULT: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
ADNode node = joinNodes(OPERATOR::MULTIPLY, lhs, rhs);
std::cout << "MULT2: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
return node;
}
inline ADNode operator+(const ADNode& lhs, const ADNode& rhs){
std::cout << "ADD: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
ADNode node = joinNodes(OPERATOR::ADD, lhs, rhs);
std::cout << "ADD2: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
return node;
}
inline ADNode operator-(const double lhs, const ADNode& rhs){
return rhs.builder->createConstantVariable(Matrix::Constant(1,1,lhs)) + (-rhs);
}
inline int getIndex(const std::vector<std::weak_ptr<ADNodeInstance>> vector, const ADNode& target){
for (int i = 0; i < vector.size(); ++i) {
auto item = vector[i].lock();
if (item->id == target.pointer->id)
return i;
}
return -1;
}
std::vector<ADNode> ADGraphBuilder::gradient(const ADNode &target, const ADNode& params) {
std::vector<ADNode> results{1, this};
// Will contain information about the subtree that effects the target node
bool subtree[nodes.size()];
std::vector<std::vector<ADNode>> childrenGradients(nodes.size(), std::vector<ADNode>{});
std::vector<ADNode> directGradients{nodes.size(), this};
subtree[target.pointer->id] = true;
for (int i = target.pointer->id; i >= 0; i--) {
std::cout << "passed " << i << std::endl;
if(subtree[i]){
// Create a new gradient node summing up all incoming gradient messages
if(i == target.pointer->id) {
directGradients[i] = createConstantVariable(Matrix::Ones(target.pointer->dim1, target.pointer->dim2));
directGradients[i].pointer->name = "Gradient[" + std::to_string(i) + "]";
}
else
directGradients[i] = createInternalVariable(GRADIENT, OPERATOR::ADD,
nodes[i].pointer->dim1, nodes[i].pointer->dim2, std::to_string(i));
for (auto child : childrenGradients[i]) {
directGradients[i].pointer->parents.push_back(child.pointer);
child.pointer->children.push_back(directGradients[i].pointer);
}
for (auto item : nodes[i].pointer->parents) {
auto parent = item.lock();
// Check if parent is not constant
if(parent->type != CONSTANT) {
// Mark parent as node to traverse
subtree[parent->id] = true;
// Create the corresponding message as a new node and add it to the parents list
// of gradient incoming messages
ADNode capsule = ADNode(this);
capsule.pointer = parent;
//childrenGradients[parent->id].push_back(createConstantVariable(Matrix::Constant(1,1,1)));
std::cout << "Capsule: " << capsule.builder << " - " << capsule.pointer->id << std::endl;
childrenGradients[parent->id].push_back(createGradientMessage(nodes[i], capsule, directGradients[i]));
std::cout << "Capsule: " << capsule.builder << " - " << capsule.pointer->id << std::endl;
//childrenGradients[parent->id].back().pointer->name = "Gradinet[" + childrenGradients[parent->id].back().pointer->name + "]";
}
}
}
}
return results;
};
ADNode ADGraphBuilder::createGradientMessage(const ADNode& child, const ADNode& parent, const ADNode& directGradient){
switch (child.pointer->op){
case(OPERATOR::NONE):
std::cout << "Making gradient of an INPUT node? Are you serious?";
return createInternalVariable(CONSTANT, OPERATOR::NONE, child.pointer->dim1,child.pointer->dim2,
std::to_string(child.pointer->id) + "->" + std::to_string(parent.pointer->id));
case(OPERATOR::ADD):
return directGradient;
case(OPERATOR::UNNARY_NEGATION):
return -directGradient;
case(OPERATOR::DOT_MULTIPLY):
if(child.pointer->parents.size() == 2){
int index = getIndex(child.pointer->parents, parent);
ADNode otherParent(child.builder);
otherParent.pointer = child.pointer->parents[1-index].lock();
if(index == 0)
return dot(directGradient,otherParent.transpose());
else
return dot(otherParent.transpose(),directGradient);
}
else{
std::cerr<< "Unimplemented" << std::endl;
return NULL;
}
case(OPERATOR::VERTICAL_CONCATENATION): {
int index = getIndex(child.pointer->parents, parent);
int start = 0;
for (int i = 0; i < index; ++i) {
auto parent = child.pointer->parents[i].lock();
start += parent->dim1;
}
return directGradient.block(start, 0, parent.pointer->dim1, parent.pointer->dim2);
}
case(OPERATOR::TANH):
return directGradient * child * (1 - child);
}
}
#endif //ADVIBE_STACK_H
主程序:
#include <iostream>
#include "Eigen/Dense"
#include "stack.h"
int main() {
ADGraphBuilder builder;
ADNode param1 = builder.createVariable(2,1);
ADNode param2 = builder.createVariable(2,3);
Matrix sad(1,1);
sad << 2;
ADNode h = tanh(dot(param2, vertCat(param1,1)));
for (int i = 0; i < 0; ++i) {
h = tanh(dot(param2,vertCat(h,1)));
}
ADNode e = dot(h.transpose(), h);
auto grad = builder.gradient(e, param2);
std::cout << "(" << e.pointer->dim1 << ", " << e.pointer->dim2 << ")" << std::endl;
return 0;
}
当我运行它时,我得到的输出如下:
passed 7
Capsule: 0x7ffffff7a750 - 6
Capsule: 0x7ffffff7a750 - 6
Capsule: 0x7ffffff7a750 - 5
Capsule: 0x7ffffff7a750 - 5
passed 6
Capsule: 0x7ffffff7a750 - 5
Capsule: 0x7ffffff7a750 - 5
passed 5
Capsule: 0x7ffffff7a750 - 4
5 U
NEG: 0x7ffffff7a750 - 5
NEG2: 0x7ffffff7a750 - 5
ADD: 0x7ffffff7a750 - 15
ADD2: 0x7ffffff7a750 - 15
MULT: 0 - 5
Different builders
terminate called after throwing an instance of 'int'
Process finished with exit code 134
根据这个意思,这意味着在函数 ADGraphBuilder :: createGradientMessage 中表达 TANH 由于某种原因表达式 child *(1 - child)< / strong>在评估右操作数后,子变量是垃圾收集还是我不知道?
欢迎提出任何意见和建议,感谢Azad和Whoz到目前为止的讨论。