我知道这可能是一个非常愚蠢的问题,但是到底是什么......
我目前正在尝试实现使用boltzmann分布的soft max动作选择器。
我有点不确定,如果你想使用特定的动作怎么知道怎么知道? 我的意思是这个函数为我提供了概率?,但是如何使用它来选择我想要执行的操作呢?
答案 0 :(得分:1)
对于某些机器学习应用程序,有一点需要将一组原始输出(如来自神经网络)映射到一组概率,归一化为总和为1.
在加强学习中,有一套可用的行动'权重可能需要映射到一组相关概率,然后用于随机选择下一步采取的行动。
Softmax函数通常用于将输出权重映射到一组相应的概率。 A"温度"参数允许调整选择策略,在纯粹的利用(一个"贪婪"策略,总是选择最高权重的行为)和纯粹的探索(每个行为具有相同的被选择概率)之间进行插值。
这是使用Softmax功能的简单示例。每个&#34;动作&#34;对应于此代码中传递的vector<double>
个对象中的一个索引条目。
#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <cmath>
using std::vector;
// The temperature parameter here might be 1/temperature seen elsewhere.
// Here, lower temperatures move the highest-weighted output
// toward a probability of 1.0.
// And higer temperatures tend to even out all the probabilities,
// toward 1/<entry count>.
// temperature's range is between 0 and +Infinity (excluding these
// two extremes).
vector<double> Softmax(const vector<double>& weights, double temperature) {
vector<double> probs;
double sum = 0;
for(auto weight : weights) {
double pr = std::exp(weight/temperature);
sum += pr;
probs.push_back(pr);
}
for(auto& pr : probs) {
pr /= sum;
}
return probs;
}
// Rng class encapsulates random number generation
// of double values uniformly distributed between 0 and 1,
// in case you need to replace std's <random> with something else.
struct Rng {
std::mt19937 engine;
std::uniform_real_distribution<double> distribution;
Rng() : distribution(0,1) {
std::random_device rd;
engine.seed(rd());
}
double operator ()() {
return distribution(engine);
}
};
// Selects one index out of a vector of probabilities, "probs"
// The sum of all elements in "probs" must be 1.
vector<double>::size_type StochasticSelection(const vector<double>& probs) {
// The unit interval is divided into sub-intervals, one for each
// entry in "probs". Each sub-interval's size is proportional
// to its corresponding probability.
// You can imagine a roulette wheel divided into differently-sized
// slots for each entry. An entry's slot size is proportional to
// its probability and all the entries' slots combine to fill
// the entire roulette wheel.
// The roulette "ball"'s final location on the wheel is determined
// by generating a (pseudo)random value between 0 and 1.
// Then a linear search finds the entry whose sub-interval contains
// this value. Finally, the selected entry's index is returned.
static Rng rng;
const double point = rng();
double cur_cutoff = 0;
for(vector<double>::size_type i=0; i<probs.size()-1; ++i) {
cur_cutoff += probs[i];
if(point < cur_cutoff) return i;
}
return probs.size()-1;
}
void DumpSelections(const vector<double>& probs, int sample_count) {
for(int i=0; i<sample_count; ++i) {
auto selection = StochasticSelection(probs);
std::cout << " " << selection;
}
std::cout << '\n';
}
void DumpDist(const vector<double>& probs) {
auto flags = std::cout.flags();
std::cout.precision(2);
for(vector<double>::size_type i=0; i<probs.size(); ++i) {
if(i) std::cout << " ";
std::cout << std::setw(2) << i << ':' << std::setw(8) << probs[i];
}
std::cout.flags(flags);
std::cout << '\n';
}
int main() {
vector<double> weights = {1.0, 2, 6, -2.5, 0};
std::cout << "Original weights:\n";
for(vector<double>::size_type i=0; i<weights.size(); ++i) {
std::cout << " " << i << ':' << weights[i];
}
std::cout << "\n\nSoftmax mappings for different temperatures:\n";
auto softmax_thalf = Softmax(weights, 0.5);
auto softmax_t1 = Softmax(weights, 1);
auto softmax_t2 = Softmax(weights, 2);
auto softmax_t10 = Softmax(weights, 10);
std::cout << "[Temp 1/2] ";
DumpDist(softmax_thalf);
std::cout << "[Temp 1] ";
DumpDist(softmax_t1);
std::cout << "[Temp 2] ";
DumpDist(softmax_t2);
std::cout << "[Temp 10] ";
DumpDist(softmax_t10);
std::cout << "\nSelections from softmax_t1:\n";
DumpSelections(softmax_t1, 20);
std::cout << "\nSelections from softmax_t2:\n";
DumpSelections(softmax_t2, 20);
std::cout << "\nSelections from softmax_t10:\n";
DumpSelections(softmax_t10, 20);
}
以下是输出示例:
Original weights:
0:1 1:2 2:6 3:-2.5 4:0
Softmax mappings for different temperatures:
[Temp 1/2] 0: 4.5e-05 1: 0.00034 2: 1 3: 4.1e-08 4: 6.1e-06
[Temp 1] 0: 0.0066 1: 0.018 2: 0.97 3: 0.0002 4: 0.0024
[Temp 2] 0: 0.064 1: 0.11 2: 0.78 3: 0.011 4: 0.039
[Temp 10] 0: 0.19 1: 0.21 2: 0.31 3: 0.13 4: 0.17
Selections from softmax_t1:
2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 1
Selections from softmax_t2:
2 2 2 2 2 2 1 2 2 1 2 2 2 1 2 2 2 2 2 1
Selections from softmax_t10:
0 0 4 1 2 2 2 0 0 1 3 4 2 2 4 3 2 1 0 1