在c ++中更快地浏览csv文件

时间:2013-11-07 06:13:45

标签: c++ multithreading performance csv

我在文件中有百万条记录,需要进行一些计算。为此,我有java程序和c ++程序的相同副本,但Java执行速度比c ++快。我切换到c ++的主要原因是执行多线程以使程序运行得更快。但是当我比较java和c ++之间的1个线程工作时,java会在一半的时间内完成工作。

我需要解决这个问题。 C ++假设速度更快,但表现不佳。

一些不错的好头像会很好,所以我可以研究并尝试修复它。

由于

这是用逗号分隔数据生成对象的类

//Parser.cpp 
#include "Parser.h"
#include "PriceBar.h"
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <stdlib.h>


using namespace std;

vector<PriceBar> Parser :: parseFile(string file){

    string STRING;
    vector<PriceBar> bars;
    ifstream infile;
    infile.open (file.c_str());
    int a=0;
    string token;


while(getline(infile,STRING)) // To get you all the lines.
{
    vector<string> data;
    istringstream ss(STRING);
    while(getline(ss, token, ',')) {
                data.push_back(token);
            }
    //cout<<data[4]<<endl;

    if(!data[1].empty()){
                            //cout << "if is working" << endl;
                            double open = atof(data[1].c_str());
                            double high = atof(data[2].c_str());
                            double low = atof(data[3].c_str());
                            double close = atof(data[4].c_str());
                            bars.push_back(PriceBar(open, high, low, close));
                        }//end of if

}//end of while
infile.close();
//cout << "parser is done " << bars[2].getOpen() <<endl;
//cout << bars.size() << endl;
return bars;

}

Pricebar类

/*
 * PriceBar.cpp
 *
 *  Created on: Nov 5, 2013
 *      Author: hansaka
 */

#include <iostream>
#include <string>
#include <vector>
#include "PriceBar.h"

using namespace std;

PriceBar :: PriceBar(double open, double high, double low, double close){
this -> open = open;
this -> high = high;
this -> low = low;
this -> close = close;
}

double PriceBar :: getOpen() {
    return open;
}
void PriceBar :: setOpen(double open) {
    this -> open = open;
}
double PriceBar :: getHigh() {
    return high;
}
void PriceBar :: setHigh(double high) {
    this -> high = high;
}
double PriceBar :: getLow() {
    return low;
}
void PriceBar :: setLow(double low) {
    this -> low = low;
}
double PriceBar :: getClose() {
    return close;
}
void PriceBar :: setClose(double close) {
    this -> close = close;
}

主档

#include <iostream>
#include <vector>
#include <string>
#include "PriceBar.h"
#include "Parser.h"
#include <ctime>

using namespace std;

int main() {
Parser p;

//getting the counter ready
time_t tstart, tend;

//Starting the time
tstart = time(0);

vector<string> path;
path.push_back("file.csv");

for( vector<string>::const_iterator it = path.begin(); it != path.end(); ++it ){
  //    cout << *it << endl;
    vector<PriceBar> priceBars = p.parseFile(*it);
    //priceBars = p.parseFile(*it);

//      cout << "done" << endl;

    double maxHigh = 0.0;
    double maxLow = 0.0;
    double maxOpen = 0.0;
    double maxClose = 0.0;
    double maxVolume = 0.0;
    double current = 0.0;

  //     cout << "hippy " << priceBars[2].getOpen() <<endl;
   int size = priceBars.size();
 //      cout << "size = " << size << endl;


    for (int j=0;j<size;j++) {
        current = priceBars[j].getOpen();
        if (current > maxOpen) {
            maxOpen = current;
        }
    }//end of pricebar for

    current = 0.0;
    for (int j=0;j<size;j++) {
        current = priceBars[j].getOpen();
        if (current > maxHigh) {
            maxHigh = current;
        }
    }
    current = 0.0;
    for (int j=0;j<size;j++) {
        current = priceBars[j].getOpen();
        if (current > maxLow) {
            maxLow = current;
        }
    }
    current = 0.0;
    for (int j=0;j<size;j++) {
        current = priceBars[j].getOpen();
        if (current > maxClose) {
            maxClose = current;
        }
    }

                cout << "MaxHigh =" << maxOpen << " MaxLow = " << maxHigh
                        << " MaxHigh =" << maxLow << " MaxLow = " << maxClose << endl;


}//end of it for
cout << "DONE" << endl;

//Ending the time count
tend = time(0);

cout << " It took " << difftime(tend, tstart) << " second(s).";

return 0;
}

我一直在编辑这段代码,所以没有多少评论,而且我只是注释了代码部分供我参考,我为此道歉。

1 个答案:

答案 0 :(得分:2)

我会做一些事情:

  1. 将从循环中读取文件时使用的对象的构造移动到相当清晰的位置。构造流并不便宜,并且一直为向量和字符串分配内存而不是重用它是无效的。
  2. 我不会真正存储std::vector<std::string>,因为它足以存储std::vector<double>,直接转换各个值。
  3. 当前代码不检查它是否实际读取了每行上的足够条目,这可能导致对格式错误的文件进行越界访问。如果文件包含5列以上,则可能无法解码尾随列。
  4. main()我会致电std::ios_base::sync_with_stdio(false);。我希望它没有太大的区别(但它确实提高了使用标准流对象的性能,但我可以想象它也会影响非常糟糕的IOStream实现的文件流)。
  5. 由于IOStream通常在头文件中实现,因此至少对具有I / O的翻译单元启用优化非常重要。
  6. 以下是我编写函数的方法:

    std::vector<PriceBar> Parser::parseFile(std::string const& file) {
        std::vector<PriceBar> bars;
        std::ifstream         infile(file.c_str());
        std::istringstream    lin;
        std::vector<double>   columns;
    
        for (std::string line, topic, value; std::getline(infile, line); ) {
            lin.clear();
            lin.str(line);
            columns.clear();
            for (std::getline(lin, topic, ','); getline(ss, value, ',')) {
                columns.push_back(value.empty()? 0.0: std::atof(value.c_str()));
            }
            if (columns.size() == 4) {
                bars.push_back(PriceBar(columns[0], columns[1], columns[2], columns[3]));
            }
        }
        return bars;
    }
    

    我不认为处理多个线程会有所帮助。阅读一个只有一百万行左右的小文件并不能保证相应的复杂性。