我需要解析几个C风格的字符串(大约500k),其中包含由单个空格字符分隔的4个浮点数。以下是单个字符串的示例:
“90292 5879 89042.2576 5879”
我需要将这些数字存储在代表两点的两个结构中。考虑到在解析时可以修改字符串,并且99.99%的次数只是无符号整数,那么最快的方法是什么?
以下是我目前的实施:
#include <iostream>
#include <cassert>
#include <chrono>
#include <algorithm>
#include <vector>
#include <string>
using namespace std;
using namespace chrono;
struct PointF
{
float x;
float y;
};
void parse_points(char* points, PointF& p1, PointF& p2)
{
auto start = points;
const auto end = start + strlen(points);
// p1.x
start = std::find(start, end, ' ');
assert(start < end);
*start = '\0';
p1.x = static_cast<float>(atof(points));
points = start + 1;
// p1.y
start = std::find(start, end, ' ');
assert(start < end);
*start = '\0';
p1.y = static_cast<float>(atof(points));
points = start + 1;
// p2.x
start = std::find(start, end, ' ');
assert(start < end);
*start = '\0';
p2.x = static_cast<float>(atof(points));
points = start + 1;
// p2.y
start = std::find(start, end, ' ');
assert(start == end);
p2.y = static_cast<float>(atof(points));
}
int main()
{
const auto n = 500000;
char points_str[] = "90292 5879 89042.2576 5879";
PointF p1, p2;
vector<string> data(n);
for (auto& s : data)
s.assign(points_str);
const auto t0 = system_clock::now();
for (auto i = 0; i < n; i++)
parse_points(const_cast<char*>(data[i].c_str()), p1, p2);
const auto t1 = system_clock::now();
const auto elapsed = duration_cast<milliseconds>(t1 - t0).count();
cout << "Elapsed: " << elapsed << " ms" << endl;
cin.get();
return 0;
}
答案 0 :(得分:0)
我发现代码有多个问题(实际上你提出的问题很好):
我建议:(请注意,std::experimental::optional<>
在此等同于boost::optional<>
)
#include <iostream>
#include <cstring>
#include <utility>
#include <experimental/optional>
struct PointF
{
float x;
float y;
};
std::experimental::optional<std::pair<PointF, PointF>> parse_points(char* pch)
{
pch = strtok (pch, " ");
if (pch != NULL)
{
float x0 = atof(pch);
pch = strtok (NULL, " ");
if (pch != NULL)
{
float y0 = atof(pch);
pch = strtok (NULL, " ");
if (pch != NULL)
{
float x1 = atof(pch);
pch = strtok (NULL, " ");
if (pch != NULL)
{
float y1 = atof(pch);
PointF p0{x0, y0}, p1{x1, y1};
return std::make_pair(p0, p1);
}
}
}
}
return std::experimental::nullopt;
}
int main() {
const char str[] ="90292 5879 89042.2576 5879";
char* pch0 = new char[sizeof(str)], *pch = pch0;
memcpy(pch0, str, sizeof(str));
std::experimental::optional<std::pair<PointF, PointF>> pOpt( parse_points(pch0) );
if(pOpt)
std::cout << pOpt->first.x << " " << pOpt->first.y << " "
<< pOpt->second.x << " " << pOpt->second.y << " " << std::endl;
delete pch;
}
答案 1 :(得分:-1)
您可以实现返回atof
位置的space
。这样,您只需要遍历每个字符串一次。
例如
char *atof(char *point, float &num) {
num = 0;
bool neg = false, dot = false;
float decimal = 0, mul = 0.1;
if (*point == '-') {
neg = true;
point++;
} else if (*point == '+') {
point++;
}
while (*point != ' ' && *point) {
if (*point == '.') {
dot = true;
} else {
if (dot) {
decimal += (*point - '0') * mul;
mul *= 0.1;
} else {
num = num * 10 + *point - '0';
}
}
point++;
}
if (dot) {
num += decimal;
}
if (neg) {
num = -num;
}
return point;
}