我需要从字节数组(std :: vector)中提取内容到bitsets。内容可能跨越两个字节。
这是我的单元测试:
std::vector<uint8_t> val = { 0xAB, 0xCD, 0xEF }; // is 101010111100110111101111
std::bitset<4> a = extractToBitSet<4>( val, 0 ); // should be 0x0A: 1010
std::bitset<8> bc = extractToBitSet<8>( val, 4 ); // should be 0xBC: 10111100
std::bitset<12> def = extractToBitSet<12>( val, 12 ); // should be 0x0DEF: 110111101111
CPPUNIT_ASSERT( a.to_string() == "1010" );
CPPUNIT_ASSERT( bc.to_string() == "10111100" );
CPPUNIT_ASSERT( def.to_string() == "110111101111" );
unsigned long aVal = a.to_ulong();
unsigned long bcVal = bc.to_ulong();
unsigned long defVal = def.to_ulong();
CPPUNIT_ASSERT( aVal == 0x0A );
CPPUNIT_ASSERT( bcVal == 0xBC );
CPPUNIT_ASSERT( defVal == 0x0DEF );
我提出了这个解决方案:
template<size_t _Count> void reverseBitSet( std::bitset<_Count>& bitset )
{
bool val;
for ( size_t pos = 0; pos < _Count/2; ++pos )
{
val = bitset[pos];
bitset[pos] = bitset[_Count-pos-1];
bitset[_Count-pos-1] = val;
}
}
template<size_t _Count> std::bitset<_Count> extractToBitSet( const std::vector<uint8_t>& data, size_t offset )
{
std::bitset<_Count> res;
size_t pos = 0;
uint8_t tempVal;
std::bitset<8> tempBitSet;
while ( pos < _Count )
{
tempVal = data[ (offset + pos)/8 ];
tempBitSet = tempVal;
reverseBitSet( tempBitSet );
res[pos] = tempBitSet[(offset + pos)%8];
++pos;
}
reverseBitSet( res );
return res;
}
它的工作原理(我的测试通过)但是在创建所有临时位集和许多反向操作时似乎非常低效。
有更优雅的方法吗?
答案 0 :(得分:6)
为什么所有逆转?您可以根据您正在执行的特定操作,明确设置每个位。只是一个循环:
template <size_t N>
std::bitset<N> extract(std::vector<uint8_t> const& vs, size_t offset) {
std::bitset<N> res;
for (size_t i = 0; i < N; ++i) {
size_t full_off = offset + i;
auto& byte = vs[full_off / 8];
auto bit = byte & (1 << (7 - (full_off % 8)));
res.set(N-i-1, bit);
}
return res;
}
答案 1 :(得分:3)
这是一个按字节版本。这里的区别在于,对于8位的内部块,数据一次被移入累加器字节,而末端被视为部分字节移位/掩码操作。
我强烈怀疑这会更有效:
#include <bitset>
#include <cstdint>
#include <iostream>
#include <cassert>
#include <vector>
template <size_t N>
std::bitset<N> extract(std::vector<uint8_t> const& vs, size_t offset)
{
std::bitset<N> accumulator;
auto lead_byte = [&accumulator] (auto byte, auto offset, auto bits)
{
static constexpr std::uint8_t masks[] = {
0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff
};
auto word_length = 8 - bits;
byte >>= (word_length - offset);
byte &= masks[bits-1];
accumulator = byte;
};
auto tail_byte = [&accumulator](std::uint8_t byte, std::size_t bits)
{
static constexpr std::uint8_t masks[] = {
0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff
};
accumulator <<= bits;
byte >>= (8-bits);
byte &= masks[bits-1];
accumulator |= byte;
};
auto middle_byte = [&accumulator](std::uint8_t byte)
{
accumulator <<= 8;
accumulator |= byte;
};
std::size_t bits_to_go = N;
const std::uint8_t* p = vs.data() + (offset / 8);
offset %= 8;
for ( ; bits_to_go ; ++p)
{
if (offset != 0)
{
auto chunk = std::min(bits_to_go, 8-offset);
lead_byte(*p, offset, chunk);
bits_to_go -= chunk;
offset = 0;
}
else if (bits_to_go < 8)
{
tail_byte(*p, bits_to_go);
bits_to_go = 0;
}
else {
middle_byte(*p);
bits_to_go -= 8;
}
}
return accumulator;
}
int main()
{
std::vector<uint8_t> val = { 0xAB, 0xCD, 0xEF }; // is 101010111100110111101111
std::bitset<4> a = extract<4>( val, 0 ); // should be 0x0A: 1010
std::bitset<8> bc = extract<8>( val, 4 ); // should be 0xBC: 10111100
std::bitset<12> def = extract<12>( val, 12 ); // should be 0x0DEF: 110111101111
assert( a.to_string() == "1010" );
assert( bc.to_string() == "10111100" );
assert( def.to_string() == "110111101111" );
unsigned long aVal = a.to_ulong();
unsigned long bcVal = bc.to_ulong();
unsigned long defVal = def.to_ulong();
assert( aVal == 0x0A );
assert( bcVal == 0xBC );
assert( defVal == 0x0DEF );
std::cout << a << std::endl;
std::cout << bc << std::endl;
std::cout << def << std::endl;
}
这是一个测试工具 - 真实的时刻......
#include <bitset>
#include <cstdint>
#include <iostream>
#include <cassert>
#include <vector>
#include <chrono>
#include <random>
template <size_t N>
std::bitset<N> extract_bitwise(std::vector<uint8_t> const& vs, size_t offset) {
std::bitset<N> res;
for (size_t i = 0; i < N; ++i) {
size_t full_off = offset + i;
auto& byte = vs[full_off / 8];
auto bit = byte & (1 << (7 - (full_off % 8)));
res.set(N-i-1, bit);
}
return res;
}
template <size_t N>
std::bitset<N> extract(std::vector<uint8_t> const& vs, size_t offset)
{
std::bitset<N> accumulator;
auto lead_byte = [&accumulator] (auto byte, auto offset, auto bits)
{
static constexpr std::uint8_t masks[] = {
0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff
};
auto word_length = 8 - bits;
byte >>= (word_length - offset);
byte &= masks[bits-1];
accumulator = byte;
};
auto tail_byte = [&accumulator](std::uint8_t byte, std::size_t bits)
{
static constexpr std::uint8_t masks[] = {
0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff
};
accumulator <<= bits;
byte >>= (8-bits);
byte &= masks[bits-1];
accumulator |= byte;
};
auto middle_byte = [&accumulator](std::uint8_t byte)
{
accumulator <<= 8;
accumulator |= byte;
};
std::size_t bits_to_go = N;
const std::uint8_t* p = vs.data() + (offset / 8);
offset %= 8;
for ( ; bits_to_go ; ++p)
{
if (offset != 0)
{
auto chunk = std::min(bits_to_go, 8-offset);
lead_byte(*p, offset, chunk);
bits_to_go -= chunk;
offset = 0;
}
else if (bits_to_go < 8)
{
tail_byte(*p, bits_to_go);
bits_to_go = 0;
}
else {
middle_byte(*p);
bits_to_go -= 8;
}
}
return accumulator;
}
auto time_it = [](auto&& func, auto&& ident)
{
auto now = std::chrono::high_resolution_clock::now();
func();
auto then = std::chrono::high_resolution_clock::now();
auto diff = then - now;
using fms = std::chrono::duration<double, std::chrono::milliseconds::period>;
std::cout << ident << " : " << fms(std::chrono::duration_cast<fms>(diff)).count() << "ms\n";
};
static constexpr std::size_t test_size = 1000000;
using byte_array = std::vector<std::uint8_t>;
using byte_arrays = std::vector<byte_array>;
template<std::size_t N> using bitset_array = std::vector<std::bitset<N>>;
byte_arrays generate_test_data()
{
byte_arrays result;
std::random_device rd;
std::default_random_engine eng(rd());
std::uniform_int_distribution<std::uint8_t> dist(0, 255);
for (std::size_t i = 0 ; i < test_size ; ++i)
{
byte_array ba(64);
std::generate(ba.begin(), ba.end(), [&] { return dist(eng); });
result.push_back(ba);
}
return result;
}
template<size_t _Count> void reverseBitSet( std::bitset<_Count>& bitset )
{
bool val;
for ( size_t pos = 0; pos < _Count/2; ++pos )
{
val = bitset[pos];
bitset[pos] = bitset[_Count-pos-1];
bitset[_Count-pos-1] = val;
}
}
template<size_t _Count> std::bitset<_Count> extractToBitSet( const std::vector<uint8_t>& data, size_t offset )
{
std::bitset<_Count> res;
size_t pos = 0;
uint8_t tempVal;
std::bitset<8> tempBitSet;
while ( pos < _Count )
{
tempVal = data[ (offset + pos)/8 ];
tempBitSet = tempVal;
reverseBitSet( tempBitSet );
res[pos] = tempBitSet[(offset + pos)%8];
++pos;
}
reverseBitSet( res );
return res;
}
template<std::size_t N>
void run_tests_for_bits(const byte_arrays& test_data)
{
bitset_array<N> bytewise(test_size), bitwise(test_size), original(test_size);
auto make_ident = [](auto&& preamble) {
return std::string(std::move(preamble)) + ' ' + std::to_string(N) + " bits";
};
std::cout << make_ident("running tests for") << '\n';
time_it([&]
{
std::transform(test_data.begin(),
test_data.end(),
bytewise.begin(),
[](auto& vec)
{
return extract<N> (vec, 12);
});
}, make_ident("bytewise"));
time_it([&]
{
std::transform(test_data.begin(),
test_data.end(),
bitwise.begin(),
[](auto& vec)
{
return extract_bitwise<N> (vec, 12);
});
}, make_ident("bitwise"));
time_it([&]
{
std::transform(test_data.begin(),
test_data.end(),
original.begin(),
[](auto& vec)
{
return extractToBitSet<N> (vec, 12);
});
}, make_ident("original"));
std::cout << "checking results\n";
auto mm1 = std::mismatch(bytewise.begin(), bytewise.end(), bitwise.begin());
auto mm2 = std::mismatch(bytewise.begin(), bytewise.end(), original.begin());
if (mm1.first != bytewise.end())
std::cout << "mismatch between bytewise and bitwise at index " << std::distance(bytewise.begin(), mm1.first) << '\n';
else if (mm2.first != bytewise.end())
std::cout << "mismatch between bytewise and original at index " << std::distance(bytewise.begin(), mm1.first) << '\n';
else
std::cout << "all good\n";
}
int main()
{
std::cout << "building test data\n";
auto test_data = generate_test_data();
run_tests_for_bits<12>(test_data);
run_tests_for_bits<24>(test_data);
run_tests_for_bits<52>(test_data);
}
示例结果:
building test data
running tests for 12 bits
bytewise 12 bits : 117.462ms
bitwise 12 bits : 316.362ms
original 12 bits : 2836.99ms
checking results
all good
running tests for 24 bits
bytewise 24 bits : 274.765ms
bitwise 24 bits : 624.927ms
original 24 bits : 5619.05ms
checking results
all good
running tests for 52 bits
bytewise 52 bits : 519.265ms
bitwise 52 bits : 1260.75ms
original 52 bits : 11693.6ms
checking results
all good
通过一些编辑(测试数据数组大小),我们可以将测试增加到更多位:
(我已经删除了#39;反向器&#39;方法,因为它对于长位长度变得非常长。)
running tests for 256 bits
bytewise 256 bits : 3912.57ms
bitwise 256 bits : 6367.22ms
running tests for 512 bits
bytewise 512 bits : 10610.2ms
bitwise 512 bits : 12448.7ms
我们可以看到按位和逐字方法的时序最终收敛并在512位之后交叉(在我的机器上)。
running tests for 1024 bits
bytewise 1024 bits : 30070.6ms
bitwise 1024 bits : 25236.2ms
之后,按位方法变得更有效。
running tests for 2048 bits
bytewise 2048 bits : 95648.4ms
bitwise 2048 bits : 47672.1ms