我有四个位向量的集合,例如:
b1 = 00001010
b2 = 10100111
b3 = 10010010
b4 = 10111110
我想得到那些在给定位向量的0,1,2,3或4中设置的位的掩码。因此m0将是未在四个位向量中设置的位掩码,m3是在三个位向量中设置的那些位的掩码等等:
m0 = 01000000
m1 = 00000001
m2 = 00111100
m3 = 10000000
m4 = 00000010
使用按位运算符查找这些掩码的最快方法是什么?
我认为这些操作对0和4位的操作最少:
m0 = ~(b1 | b2 | b3 | b4) // 4 ops
m4 = b1 & b2 & b3 & b4 // 3 ops
对于其他选项,我不太确定我的方法操作最少:
m1 = ((b1 ^ b2) & ~(b3 | b4)) | (~(b1 | b2) & (b3 ^ b4)) // 9 operations
m2 = ((b1 ^ b2) & (b3 ^ b4)) | ((b1 ^ b3) & (b2 ^ b4)) | ((b1 ^ b4) & (b2 ^ b3)) // 11 operations
m3 = ((b1 ^ b2) & (b3 & b4)) | ((b1 & b2) & (b3 ^ b4)) // 7 operations
这是计算这些蒙版的最快方法吗?还是我可以更快地完成(在更少的操作中)?
对于大多数情况,我需要一个或几个这些面具,而不是所有这些面具。
(注意,实际上我将为64或128位向量执行此操作。它可能无关紧要,但我在32位x86平台上使用C进行此操作。)
答案 0 :(得分:10)
所有面具的14次操作。
我们的想法是先使用min = x & y
和max = x | y
作为条件交换对位进行排序。这需要10次操作。然后简单地提取需要花费4次操作的面具。
// Split in lower and upper half
var c1 = b1 & b2;
var c3 = b1 | b2;
var c2 = b3 & b4;
var c4 = b3 | b4;
// Sort within each half
var d1 = c1 & c2; // (b1 & b2) & (b3 & b4)
var d2 = c1 | c2; // (b1 & b2) | (b3 & b4)
var d3 = c3 & c4; // (b1 | b2) & (b3 | b4)
var d4 = c3 | c4; // (b1 | b2) | (b3 | b4)
// Sort middle
var e1 = d1; // (b1 & b2) & (b3 & b4)
var e2 = d2 & d3; // ((b1 & b2) | (b3 & b4)) & ((b1 | b2) & (b3 | b4))
var e3 = d2 | d3; // ((b1 & b2) | (b3 & b4)) | ((b1 | b2) & (b3 | b4))
var e4 = d4; // (b1 | b2) | (b3 | b4)
// Extract masks
var m4 = e1; // (b1 & b2) & (b3 & b4)
var m3 = e2 ^ e1; // (((b1 & b2) | (b3 & b4)) & ((b1 | b2) & (b3 | b4))) ^ ((b1 & b2) & (b3 & b4))
var m2 = d3 ^ d2; // The same as e3 ^ e2, saves two operations if only m2 is required
var m1 = e4 ^ e3; // ((b1 | b2) | (b3 | b4)) ^ (((b1 & b2) | (b3 & b4)) | ((b1 | b2) & (b3 | b4)))
var m0 = ~e4; // ~((b1 | b2) | (b3 | b4))
(代码在C#中,但将其转换为C是微不足道的。)
如果你使用这个代码只计算一些掩码,只需删除不会影响结果的行(一个不错的编译器应该自动执行此操作)。表现也不错:
m4:3次操作答案 1 :(得分:8)
首先考虑位向量长度为1的普通情况,即它们只是单个位。你真正要做的是计算这些位的数量;将计数映射到您想要的位掩码是一个相对简单的练习。
诀窍是找到一种方法仅使用按位运算(即AND,OR,XOR和NOT)计数位,结果计数的每一位最终都在一个单独的变量中。如果你能做到这一点,那么你可以同时使用适合变量的位数。此技术称为bit-slicing或SIMD-within-a-register(SWAR)。
所以你基本上要做的是实现 n - 输入单位binary adder(在你的情况下, n = 4)使用按位逻辑运算。幸运的是,数字电路设计人员已经对这个问题进行了广泛的研究。
一般解决方案涉及维护 k 位向量数组 t 1 , t 2 ,..., t k (其中2 k > n )存储计数的位,并将每个输入位向量添加到一次计数一个:
// initialize counts to zero
int count[k];
for (int j = 0; j < k; j++) count[j] = 0;
// count input bits
for (int i = 0; i < n; i++) {
int carry = input[i];
for (int j = 0; j < k && carry != 0; j++) {
int temp = count[k];
count[k] = carry ^ temp;
carry = carry & temp;
}
// XXX: if carry != 0 here, some of the counts have overflowed
}
然后你可以从计数中提取你的位掩码:
int masks[n+1];
for (int i = 0; i <= n; i++) {
masks[n] = ~0; // initialize all mask bits to 1
for (int j = 0; j < k; j++) {
masks[n] &= (n & (1 << j) ? count[j] : ~count[j]);
}
}
当然,如果输入的数量很小且固定,我们可以针对该特定值优化代码。例如,对于 n = 4,我们可以使用:
// count input bits, store counts in bit-planes c0, c1, c2
int c0 = b0 ^ b1 ^ b2 ^ b3;
int c2 = b0 & b1 & b2 & b3;
int c1 = ((b0 & b1) | ((b0 ^ b1) & b2) | ((b0 ^ b1 ^ b2) & b3)) & ~c2;
// build masks from bit-planes
int m0 = ~c0 & ~c1 & ~c2;
int m1 = c0 & ~c1 & ~c2;
int m2 = ~c0 & c1 & ~c2;
int m3 = c0 & c1 & ~c2;
int m4 = c2; // XXX: count cannot be more than 4
完全天真的编译器会从此代码生成23个AND / OR / XOR运算和9个NOT。但是,一个不错的编译器应该缓存~c0
,~c1
和~c2
的值,最多可以保存6个NOT,也可能还有一些重复的子表达式,如b0 & b1
,{ {1}},b0 ^ b1
,b0 ^ b1 ^ b2
和~c1 & ~c2
,最多可保存6个AND / XOR,总计17个AND / OR / XOR加3个NOTs = 20个操作数,这与Jonathan Mee的19-op解决方案非常接近。
事实上,我们可以通过意识到我们实际上不需要计算c1 & ~c2
来做得更好,但可以使用c1
。我们还可以通过注意c12 = c1 | c2
(或c2
)是否c0
无法设置c1
来进一步优化蒙版构建:
// count input bits, store counts in bit-planes c0, (c1 = c12 & ~c2), c2
int c0 = b0 ^ b1 ^ b2 ^ b3;
int c2 = b0 & b1 & b2 & b3;
int c12 = ((b0 & b1) | ((b0 ^ b1) & b2) | ((b0 ^ b1 ^ b2) & b3));
// build masks from bit-planes
int m0 = ~c0 & ~c12;
int m1 = c0 & ~c12; // c0 implies ~c2
int m2 = ~c0 & c12 & ~c2;
int m3 = c0 & c12; // c0 implies ~c2
int m4 = c2; // c2 implies ~c0 & ~c1
对于一个天真的编译器,这是19个AND / OR和5个NOT,但是普通的子表达式优化应该将其减少到15个AND / OR和3个NOT,总共 18个ops 。
(当然,现代处理器的实际性能提升将来自指令重新排序以减少管道停滞。我怀疑这段代码在这方面应该做得相当好:虽然掩码显然取决于计数和计数输入,在计数或掩码中没有内部依赖关系,因此应该有足够的空间进行重新排序。)
更新 2014年11月23日:我之前的代码未经测试,并且在c1
/ c12
的表达式中包含一个错误。我现在已经修复了它,甚至设法让它稍微更优化,在公共子表达式消除后节省了一个操作(但是为一个天真的编译器花费了一个额外的操作)。尽管如此,它仍然使用比CodesInChaos基于排序的解决方案更多的操作。
答案 2 :(得分:7)
由于m2
似乎更难计算,你可以写:
m2 = ~(m0 | m1 | m3 | m4) // 4 ops
答案 3 :(得分:7)
假设下面的(低效)C ++搜索代码是正确的,那么操作最少的表达式,不允许使用临时变量,如下所示:
(4 ops): m0 = ~(((b3 | b2) | b1) | b0)
(7 ops): m1 = ((((b1 & b0) | b3) | b2) ^ (((b3 & b2) | b1) | b0))
(7 ops): m2 = (((b3 | b0) & (b2 | b1)) ^ ((b2 & b1) | (b3 & b0)))
(7 ops): m3 = (((((b3 | b2) & b1) & b0) ^ (b3 & b2)) & (b1 | b0))
(3 ops): m4 = (((b3 & b2) & b1) & b0)
我只用手检查了m1。这是一个有趣的问题,但您确定这是您软件的瓶颈吗?即使是这样,使用最少的操作的实现也可能不是最快的,例如,我不知道但是NOT可能比其他操作更快。
// A program to search for boolean expressions that determine
// whether n of bools x0,..x3 are true, made up of a minimal
// number of ands, ors, xors and nots.
// There are 2**4=16 possible assignments of x0,..x3
// so there are 2**16 functions (assignments) -> (output)
// thus each map can be identified as an integer
// fun[i] will be the list of ids of all functions that
// can be represented with <= n operations
// options
const int max_ops = 7; // max number of ops to search to
#include <tchar.h>
#include <vector>
#include <set>
#include <iostream>
#include <bitset>
#include <map>
#include <string>
using namespace std;
typedef enum {
LITERAL,
NOT,
AND,
OR,
XOR
} OpType;
typedef struct {
int first;
int second;
OpType type;
} op;
int get_count_fn(int k)
{
// return the id of the function which is true if
// k of the 4 inputs are true
int x = 0;
for (int j = 0; j < 16; j++)
{
int m = 0;
for (int i = 0; i < 4; i++)
{
if (j & (1 << i))
{
m += 1;
}
}
if (m == k)
{
x |= (1 << j);
}
}
return x;
}
void add_triple(map<int, op> & src, int first, int second, OpType type, int result)
{
// record an operation
op rhs;
rhs.first = first;
rhs.second = second;
rhs.type = type;
src[result] = rhs;
}
int get_first(const vector<map<int, op>> & src, int val)
{
// find the first n such that src[n] contains val
for (unsigned int i = 0; i < src.size(); i++)
{
if (src[i].find(val) != src[i].end())
{
return i;
}
}
return -1;
}
string display_retrace(const vector<map<int, op>> & src, int val)
{
// trace a function backwards to find out how it was constructed
string result;
// find the op leading to it
int n = get_first(src, val);
auto iter = src[n].find(val);
op o = iter->second;
// print it out, recursively
if (o.type == LITERAL)
{
result = string("b") + to_string(o.first);
}
else if (o.type == NOT)
{
result = string("~") + display_retrace(src, o.first);
}
else if (o.type == AND)
{
result = string("(") + display_retrace(src, o.first) + string(" & ") +
display_retrace(src, o.second) + string(")");
}
else if (o.type == OR)
{
result = string("(") + display_retrace(src, o.first) + string(" | ") +
display_retrace(src, o.second) + string(")");
}
else if (o.type == XOR)
{
result = string("(") + display_retrace(src, o.first) + string(" ^ ") +
display_retrace(src, o.second) + string(")");
}
return result;
}
int _tmain(int argc, _TCHAR* argv[])
{
int all_on = (1 << 16) - 1;
vector<int> countFuns;
vector<bool> foundCountFuns;
vector<map<int, op>> src;
cout << "The `counting' functions we seek are:\n";
for (int k = 0; k <= 4; k++)
{
int cf = get_count_fn(k);
cout << std::bitset<16>(cf) << "\n";
countFuns.push_back(cf);
foundCountFuns.push_back(false);
}
for (int i = 0; i <= max_ops; i++)
{
src.push_back(map<int, op>());
}
// add all the literals to the list for 0 operations
for (int i = 0; i < 4; i++)
{
int x = 0;
for (int j = 0; j < 16; j++)
{
if (j & (1 << i))
{
x |= (1 << j);
}
}
add_triple(src[0], i, -1, LITERAL, x);
}
// iterate over the number n of operators
for (int n = 1; n <= max_ops; n++)
{
// iterate over i,j with i+j=n-1
for (int i = 0; i <= n - 1; i++)
{
int j = n - i - 1;
// add all combinations of all vectors to the list for n
for (auto pi = src[i].begin(); pi != src[i].end(); pi++)
{
for (auto pj = src[j].begin(); pj != src[j].end(); pj++)
{
int xi = pi->first;
int xj = pj->first;
add_triple(src[n], xi, xj, OR, xi | xj);
add_triple(src[n], xi, xj, AND, xi & xj);
add_triple(src[n], xi, xj, XOR, xi ^ xj);
}
}
}
// also add the "nots" from n-1
for (auto pprev = src[n - 1].begin(); pprev != src[n - 1].end(); pprev++)
{
int xprev = pprev->first;
add_triple(src[n], xprev, -1, NOT, all_on - xprev);
}
cout << "Functions with " << n << " operators: size is " << src[n].size() << " ---\n";
// search for the functions we are interested in
for (unsigned int k = 0; k < countFuns.size(); k++)
{
if (!foundCountFuns[k] && src[n].find(countFuns[k]) != src[n].end())
{
cout << "Found count function " << k << ":\n";
cout << "m" << k << " = " << display_retrace(src, countFuns[k]) << "\n";
foundCountFuns[k] = true;
}
}
}
system("pause");
return 0;
}
答案 4 :(得分:5)
请原谅C ++。它只是使输出更容易。
const int b1 = 0b00001010;
const int b2 = 0b10100111;
const int b3 = 0b10010010;
const int b4 = 0b10111110;
// 4 operations
const int x12 = b1 ^ b2;
const int x34 = b3 ^ b4;
const int a12 = b1 & b2;
const int a34 = b3 & b4;
const int m0 = ~(b1 | b2 | b3 | b4); // 4 operations
const int m3 = ((x12) & (a34)) | ((a12) & (x34)); // 3 operations
const int m1 = ((x12) ^ (x34)) & ~m3; // 3 operations
const int m4 = a12 & a34; //1 operation
const int m2 = ~(m0 | m1 | m3 | m4); //4 operations
cout << bitset<8>(m0) << endl << bitset<8>(m1) << endl << bitset<8>(m2) << endl << bitset<8>(m3) << endl << bitset<8>(m4) << endl;
答案 5 :(得分:5)
以下是按同时计算的面具数量排序的结果。
如果单独计算,每个掩模最多需要7次操作:
a01 = b0 & b1
a23 = b2 & b3
r01 = b0 | b1
r23 = b2 | b3
m0 = ~(r01 | r23) // 4 ops
m1 = (a01 | r23) ^ (r01 | a23) // 7 ops
m2 = (r01 & r23) ^ (a01 | a23) // 7 ops
m3 = (r01 & r23) & (a01 ^ a23) // 7 ops
m4 = a01 & a23 // 3 ops
这里有很多常见的子表达式,所以如果你需要同时知道任何一对掩码,你最多需要10次操作(少于m0
或m4
)。
但是对某些对的计算可能会进一步优化:
// m2,m3 in 9 ops
t1 = r01 & r23
t2 = a01 ^ a23
m2 = t1 ^ (a23 | t2)
m3 = t1 & t2
// m1,m3 in 9 ops
t1 = r01 ^ r23
t2 = a01 ^ a23
t3 = t1 ^ t2
m1 = t1 & t3
m3 = t2 & t3
相同的方法适用于面具三元组。在11次操作中,只有一个三元组(m1
,m2
,m3
)可以更快地计算出"sort the bits" approach,这是最佳的。
如果您需要同时使用4个或5个掩码,我相信,“排序”方法会给出最佳结果。
如果我们再允许一次操作(NAND),则可以进行更多优化。实际上,最后一个代码片段的最后3行可以用2个NAND代替:
// m1,m3 in 8 ops (NAND is a single op)
t1 = r01 ^ r23
t2 = a01 ^ a23
m1 = t1 & ~t2
m3 = ~t1 & t2
并且(m1,m2,m3)三倍也可以用NAND进行优化:
// m1,m2,m3 in 10 ops (NAND is a single op)
x01 = b0 ^ b1
x23 = b2 ^ b3
a01 = b0 & b1
a23 = b2 & b3
t1 = x01 ^ x23
t2 = a01 ^ a23
m1 = t1 & ~t2
m2 = ~t1 & (x23 ^ t2)
m3 = t1 & t2
再添加一项操作m4 = a01 & a23
,以便在11次操作中获取除m0
以外的所有掩码。
这些结果是在穷举搜索代码的帮助下获得的(见下文)。此代码使用一些简化的假设,以便能够足够快地运行。这些假设并不明显,这使得这些代码不是一个很好的工具来证明结果的最优性。至少单独掩码的结果是最佳的,code from other answer证明了这一点。掩码对和三元组结果的最优性由我的代码“证明”,这意味着你很可能不能更快地完成它。
这是代码(C ++ 14或C ++ 11加二进制文字):
/* Search for minimal logical expression (using &|^ operations) for bits set
* exactly N times (in a group of 4 bits).
*
* Uses brute force approach to get one or two expressions for one or two
* values of N at once. To make it possible getting results in reasonable time
* some simplifications were made:
* - first 4 operations pre-defined: &| or ^& or ^| for 2 pairs of input values
* - number of ops limited, if no result found within limit, print "impossible"
* - no attempts to perform operation on 2 expr with the same left and the same
* right parts
* - unused nodes are not allowed (to minimize number of duplicate attempts)
* - no attempt to use "not" (except for "m0")
*
* Also these optimizations were tried (with no significant effect):
* - no more than 2 different ops on same pair of sub-expressions
* - do not apply same op on same pair of sub-expressions more than once
*
* operation set may be extended by "nand" (kNAnd option)
*/
#include <algorithm>
#include <array>
#include <iostream>
#include <bitset>
#include <thread>
#include <mutex>
#include <cassert>
using namespace std;
enum {
kMaxSize = 17,
kNTargets = 5,
kNInputs = 4,
kNil = 255,
kNAnd = 0,
};
enum Op {
OpAnd = kNInputs,
OpOr,
OpXor,
OpNAndL,
OpNAndR,
};
array<const char*, kNInputs + 3> g_op_str {
"b0", "b1", "b2", "b3",
" & ", " | ", " ^ ",
};
array<unsigned, kNTargets> g_target_masks {
0b0111111111111111, // gives correct result only after additional "not"
0b0111100000000000,
0b0000011111100000,
0b0000000000011110,
0b0000000000000001,
};
// 0111122222233334
array<unsigned, kNInputs> g_literal_vals {
0b0100011100001111,
0b0010010011010111,
0b0001001010111011,
0b0000100101111101,
};
unsigned g_targets = 0;
unsigned g_score_limit = 0;
mutex g_print_mutex;
template<typename C, typename T>
ptrdiff_t findIndex(C c, T t)
{
auto it = find(begin(c), end(c), t);
return it - begin(c);
}
struct DAGNode
{
unsigned value;
uint8_t op;
bool l_not;
bool r_not;
uint8_t left;
uint8_t right;
uint8_t use_cnt;
void clear()
{
use_cnt = 0;
}
void setLit(const uint8_t lit, const unsigned v)
{
value = v;
op = lit;
l_not = false;
r_not = false;
left = kNil;
right = kNil;
use_cnt = 0;
}
};
struct DAG
{
array<DAGNode, kMaxSize> nodes;
unsigned size;
unsigned score;
void print(ostream& out, size_t ind)
{
auto& node = nodes[ind];
if (node.op < kNInputs)
{
out << g_op_str[node.op];
}
else
{
out << '(';
if (node.l_not) out << '~';
print(out, node.left);
out << g_op_str[node.op];
if (node.r_not) out << '~';
print(out, node.right);
out << ')';
}
}
void printAll(ostream& out)
{
for (size_t i = 2 * kNInputs; i < size; ++i)
{
auto& node = nodes[i];
auto ind = findIndex(g_target_masks, node.value);
if ((1 << ind) & g_targets)
{
out << 'm' << static_cast<char>('0' + ind) << " = ";
print(out, i);
out << '\n';
}
}
}
};
bool operator < (const DAG& l, const DAG& r)
{
return l.score < r.score;
}
class Find
{
using SPA = array<uint8_t, (kMaxSize - kNInputs) * (kMaxSize - kNInputs)>;
using EDA = bitset<(kMaxSize - kNInputs) * (kMaxSize - kNInputs) * 5>;
SPA same_pair_;
EDA dup_op_;
DAG dag_;
DAG best_;
unsigned ops_;
unsigned targets_;
unsigned unused_;
class UseCnt
{
unsigned& unused_;
uint8_t& use_cnt_;
public:
UseCnt(unsigned& unused, uint8_t& use_cnt)
: unused_(unused)
, use_cnt_(use_cnt)
{
if (!use_cnt_)
--unused_;
++use_cnt_;
}
~UseCnt()
{
--use_cnt_;
if (!use_cnt_)
++unused_;
}
};
class PairLim
{
uint8_t& counter_;
public:
PairLim(SPA& spa, size_t l, size_t r)
: counter_(spa[(kMaxSize - kNInputs) * l + r])
{
++counter_;
}
bool exceeded()
{
return counter_ > 2;
}
~PairLim()
{
--counter_;
}
};
class DupLim
{
EDA& eda_;
size_t ind_;
public:
DupLim(EDA& eda, size_t l, size_t r, size_t op)
: eda_(eda)
, ind_(5 * ((kMaxSize - kNInputs) * l + r) + op - kNInputs)
{
eda_.flip(ind_);
}
bool used()
{
return !eda_.test(ind_);
}
~DupLim()
{
eda_.flip(ind_);
}
};
unsigned getPos(uint8_t l)
{
return dag_.nodes[l].value;
}
bool tryNode(uint8_t l, uint8_t r, uint8_t op)
{
//DupLim dl(dup_op_, l, r, op);
//if (dl.used())
// return false;
addNode(l, r, op);
auto node = dag_.nodes[dag_.size - 1];
const auto ind = findIndex(g_target_masks, node.value);
const auto m = (1 << ind) & targets_;
if (m)
{
++node.use_cnt;
--unused_;
if (targets_ == m)
{
best_ = dag_;
--dag_.size;
--dag_.score;
return true;
}
targets_ &= ~m;
}
search();
if (!m)
{
--unused_;
}
targets_ |= m;
--dag_.size;
--dag_.score;
return false;
}
public:
Find()
: ops_(kNInputs)
, targets_(g_targets)
, unused_(0)
{
dag_.score = 0;
dag_.size = kNInputs;
best_.score = g_score_limit;
best_.size = 0;
for (int i = 0; i < kNInputs; ++i)
dag_.nodes[i].setLit(static_cast<uint8_t>(i), g_literal_vals[i]);
fill(begin(same_pair_), end(same_pair_), 0);
}
void addNode(const uint8_t l, const uint8_t r, uint8_t op)
{
auto& node = dag_.nodes[dag_.size];
switch (op)
{
case OpAnd:
node.value = getPos(l) & getPos(r);
break;
case OpOr:
node.value = getPos(l) | getPos(r);
break;
case OpXor:
node.value = getPos(l) ^ getPos(r);
break;
case OpNAndL:
node.value = ~getPos(l) & getPos(r);
break;
case OpNAndR:
node.value = getPos(l) & ~getPos(r);
break;
default:
assert(false);
}
node.op = op;
node.l_not = false;
node.r_not = false;
node.left = l;
node.right = r;
node.use_cnt = 0;
if (op == OpNAndL)
{
node.l_not = true;
node.op = OpAnd;
}
else if (op == OpNAndR)
{
node.r_not = true;
node.op = OpAnd;
}
++dag_.size;
++dag_.score;
++unused_;
}
void search()
{
if (dag_.score >= best_.score)
return;
for (uint8_t i_r = kNTargets; i_r < dag_.size; ++i_r)
{
UseCnt uc_r(unused_, dag_.nodes[i_r].use_cnt);
if (unused_ > 2 * (best_.score - dag_.score) - 1)
continue;
for (uint8_t i_l = kNInputs; i_l < i_r; ++i_l)
{
UseCnt uc_l(unused_, dag_.nodes[i_l].use_cnt);
if (unused_ > 2 * (best_.score - dag_.score) - 2)
continue;
if (dag_.nodes[i_l].left == dag_.nodes[i_r].left &&
dag_.nodes[i_l].right == dag_.nodes[i_r].right
)
continue;
PairLim pl(same_pair_, i_l, i_r);
if (pl.exceeded())
continue;
if (tryNode(i_l, i_r, OpAnd))
return;
if (tryNode(i_l, i_r, OpOr))
return;
if (tryNode(i_l, i_r, OpXor))
return;
if (kNAnd)
{
if (tryNode(i_l, i_r, OpNAndL))
return;
if (tryNode(i_l, i_r, OpNAndR))
return;
}
}
}
}
void print(ostream& out, const char* name)
{
if (best_.score < g_score_limit)
{
out << name << " ops = " << best_.score << '\n';
best_.printAll(out);
out << '\n';
}
else
{
out << name << " impossible\n\n";
}
}
void process(ostream& out, const char* name)
{
search();
lock_guard<mutex> lk(g_print_mutex);
print(out, name);
}
};
unsigned readTargets(char* str)
{
unsigned num = 0;
for (; *str; ++str)
{
if (*str >= '0' && *str <= '4')
{
g_targets |= 1 << (*str - '0');
++num;
}
}
return num;
}
void usage()
{
cerr << "Usage: bitcnt [0-4]*\n"
"example: bitcnt 23 (to find targets m2,m3)\n";
exit(1);
}
int main(int argc, char **argv) {
if (argc > 1)
g_score_limit = 6 + 2 * readTargets(argv[1]);
else
usage();
// set score_limit to 10 for m1,m2,m3 (with nand), time ≈ 1h
array<Find, 3> finders;
finders[0].addNode(0, 1, OpAnd);
finders[0].addNode(0, 1, OpOr);
finders[0].addNode(2, 3, OpAnd);
finders[0].addNode(2, 3, OpOr);
finders[1].addNode(0, 1, OpAnd);
finders[1].addNode(0, 1, OpXor);
finders[1].addNode(2, 3, OpAnd);
finders[1].addNode(2, 3, OpXor);
finders[2].addNode(0, 1, OpXor);
finders[2].addNode(0, 1, OpOr);
finders[2].addNode(2, 3, OpXor);
finders[2].addNode(2, 3, OpOr);
auto t0 = thread([&finders]{finders[0].process(cout, "&|");});
auto t1 = thread([&finders]{finders[1].process(cout, "&^");});
auto t2 = thread([&finders]{finders[2].process(cout, "^|");});
t0.join(); t1.join(); t2.join();
}
这个答案的第一个版本仍然是正确的,但最近的结果已经过时了:
10个操作中的 m2
:
x1 = b1 ^ b2;
x2 = b3 ^ b4;
m2 = (x1 & x2) | (((b1 & b2) ^ (b3 & b4)) & ~(x1 | x2));
8个操作中的 m1
:
m1 = (b1 ^ b2 ^ b3 ^ b4) & ~((b1 & b2) | (b3 & b4));