在对我提出的要关闭映射文件(c++ close a open() file read with mmap)的问题提出建议后,我进行了一些比较,并注意到,正如某些用户所建议的那样,std :: cin缓冲区方法的性能类似于我的映射方法。
我决定进行性能比较:每个脚本都会打开一个包含其他文件路径的文件(大约3500个),读取该文件并获取10个随机路径,然后打开所有这10个文件(每行500行,每个大约700个字符)行),并随机读取其中的换行数1000次。
我的原始版本是一种映射方法(但是最后没有关闭文件,并且在打开300-400个文件后出现错误,在这种情况下没有实现(c++ close a open() file read with mmap)。
MMAP.OPEN(v0):
#include <algorithm>
#include <iostream>
#include <cstring>
#include <vector>
#include <set>
#include <typeinfo>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <fstream>
#include <sstream>
#include <unistd.h>
const char* map_file(const char* fname, size_t& length);
void gnomadIndex( std::string &nomeFileIndex,
std::vector<std::string> &vSubfileNames,
std::vector<int> &vSubfileStarts,
std::vector<int> &vSubfileStops,
std::vector<std::vector<int>> &vSubfilePosizioniVector);
void gnomadSubfileAnalysis( std::string &nomeFile );
void populateVector( std::vector<int> &vec, int n );
int generateRandInt( int l0, int u0 );
int main() {
//this take the INDEX file (with the paths to the athor) and populate these vectors
std::string gnomadSplitDir = "/Volumes/enrico/gnomad/exomes/splitted_5c/";
std::string nomeChr = "1";
std::stringstream streamNomeGnomadChrDir;
streamNomeGnomadChrDir << gnomadSplitDir << "ex_" << nomeChr << "_5c/" << "chr_" << nomeChr << "/";
std::string pathGnomadChrDir = streamNomeGnomadChrDir.str();
std::stringstream streamNomeGnomadChrIndex;
streamNomeGnomadChrIndex << pathGnomadChrDir << "chr_" << nomeChr << ".txt";
std::string pathGnomadChrIndex = streamNomeGnomadChrIndex.str();
std::vector<std::string> vSubfileNames;
std::vector<int> vSubfileStarts;
std::vector<int> vSubfileStops;
std::vector<std::vector<int>> vSubfilePosizioniVector;
gnomadIndex(pathGnomadChrIndex,
vSubfileNames,
vSubfileStarts,
vSubfileStops,
vSubfilePosizioniVector
);
std::vector<std::string> vGnomadSubfilePaths;
srand((unsigned)time(NULL)); //seeds the pseudo random number generator that rand() uses (http://www.cplusplus.com/forum/beginner/29699/)
int size0 = 10;
std::vector<int> v0;
populateVector(v0, size0);
//the vector with the file names is converted in file paths and then opened and line counted for each file
std::vector<std::string> vSubfileNames2;
for (auto si : v0) vSubfileNames2.push_back(vSubfileNames[si]);
for ( int subCount = 0; subCount < vSubfileNames2.size(); subCount++ ) {
std::stringstream streamNomeSubfileGnomad;
streamNomeSubfileGnomad << pathGnomadChrDir << vSubfileNames2[subCount];
std::string pathGnomadSubfile = streamNomeSubfileGnomad.str();
gnomadSubfileAnalysis(pathGnomadSubfile);
}
}
void gnomadSubfileAnalysis( std::string &nomeFile ) {
size_t length;
auto f = map_file(nomeFile.c_str(), length);
auto l = f + length;
std::vector<int> v0;
for (int i=0; i<length; i++) if (f[i] == '\n') v0.push_back(i);
std::cout << "subfile: " << nomeFile << ", has: " << v0.size() << " rows in: " << length << " bytes." << '\n';
}
void gnomadIndex( std::string &nomeFileIndex,
std::vector<std::string> &vSubfileNames,
std::vector<int> &vSubfileStarts,
std::vector<int> &vSubfileStops,
std::vector<std::vector<int>> &vSubfilePosizioniVector
) {
size_t length;
auto f = map_file(nomeFileIndex.c_str(), length);
auto l = f + length;
std::vector<int> v0;
v0.push_back(0);
for (int i=0; i<length; i++) {
if (f[i] == '\n') v0.push_back(i+1);
}
v0.pop_back();
for (int nl = 0; nl < v0.size(); nl++) {
int ncol = 0;
std::vector<char> vSubfile;
std::vector<char> vSubStart;
std::vector<char> vSubStop;
std::vector<char> vSubPos;
std::vector<int> vSubfilePosVector;
for( int start=v0[nl]; start<v0[nl+1]; start++ ) {
if (f[start] == ',') ncol++;
if (ncol == 0) {
if ( f[start] != ',' ) {
vSubfile.push_back(f[start]);
}
}
if (ncol == 1) {
if ( f[start] != ',' ) {
vSubStart.push_back(f[start]);
}
}
if (ncol == 2) {
if ( f[start] != ',' ) {
vSubStop.push_back(f[start]);
}
}
if (ncol == 3) {
if ( f[start] != ',' ) {
if ( f[start] != ':' ) {
vSubPos.push_back(f[start]);
}
if ( f[start] == ':' || f[start] == '\n' ) {
std::string subfilePosValue ( vSubPos.begin(), vSubPos.end() );
vSubPos.clear();
vSubfilePosVector.push_back( stoi(subfilePosValue) );
subfilePosValue.erase();
}
}
}
}
std::string subFileValue ( vSubfile.begin(), vSubfile.end() );
vSubfile.clear();
std::string subfileStartValue ( vSubStart.begin(), vSubStart.end() );
vSubStart.clear();
std::string subfileStopValue ( vSubStop.begin(), vSubStop.end() );
vSubStop.clear();
vSubfileNames.push_back( subFileValue );
vSubfileStarts.push_back( stoi( subfileStartValue ) );
vSubfileStops.push_back( stoi(subfileStopValue) );
vSubfilePosizioniVector.push_back( vSubfilePosVector );
}
}
void handle_error(const char* msg) {
perror(msg);
exit(255);
}
const char* map_file(const char* fname, size_t& length) {
int fd = open(fname, O_RDONLY);
if (fd == -1)
handle_error("open");
struct stat sb;
if (fstat(fd, &sb) == -1)
handle_error("fstat");
length = sb.st_size;
const char* addr = static_cast<const char*>(mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0u));
if (addr == MAP_FAILED)
handle_error("mmap");
return addr;
}
void populateVector( std::vector<int> &vec, int n ) {
for( int i = 0 ; i < n ; ++i ) vec.push_back( 0 + rand() % (3000 - 1 + 1)); //gli ultimi due num sono upper e lower range limits
sort( vec.begin(), vec.end() );
}
int generateRandInt( int l0, int u0 ) {
int n0 = l0 + rand() % (u0 - l0 + 1);
return n0;
}
根据建议,然后我尝试使用 CIN(v1),此处为代码:
#include <iostream>
#include <fstream>
#include <string>
#include <cstring>
#include <vector>
#include <sstream>
void read_file_cin( std::string &nomeFile );
void read_gnomad_index_cin( std::string &nomeFile, char &separator, std::vector<std::string> &vectorName );
void populateVector( std::vector<int> &vec, int n );
int generateRandInt( int l0, int u0 );
std::vector<int> sovrapposizioniVectorsInt( std::vector<int> &v0, std::vector<int> &v1 );
int main() {
std::string pathGnomadChrIndex = "/Volumes/enrico/gnomad/exomes/splitted_5c/ex_1_5c/chr_1/chr_1.txt";
char separator = ',';
std::vector<std::string> vSubfileNames;
read_gnomad_index_cin(pathGnomadChrIndex, separator, vSubfileNames);
srand((unsigned)time(NULL)); //seeds the pseudo random number generator that rand() uses (http://www.cplusplus.com/forum/beginner/29699/)
int size0 = 10;
std::vector<int> v0;
populateVector(v0, size0);
std::vector<std::string> vSubfileNames2;
for (auto si : v0) vSubfileNames2.push_back(vSubfileNames[si]);
std::string gnomadSplitDir = "/Volumes/enrico/gnomad/exomes/splitted_5c/ex_1_5c/chr_1/";
for ( auto vi : vSubfileNames2 ) {
std::cout << vi << '\n';
std::stringstream streamNomeGnomadChrDir;
streamNomeGnomadChrDir << gnomadSplitDir << vi;
std::string pathSubfile = streamNomeGnomadChrDir.str();
read_file_cin( pathSubfile );
}
}
void first_column_cin( char &separator, std::vector<std::string> &vectorName) {
std::string line;
int lineCount = 0;
while(std::getline(std::cin, line)) {
std::string filename;
std::istringstream iss(line);
getline(iss, filename, separator);
vectorName.push_back(filename);
lineCount++;
}
}
void read_gnomad_index_cin( std::string &nomeFile, char &separator, std::vector<std::string> &vectorName ) {
std::ifstream in(nomeFile.c_str());
std::streambuf *cinbuf = std::cin.rdbuf();
std::cin.rdbuf(in.rdbuf());
first_column_cin( separator, vectorName );
std::cin.rdbuf(cinbuf);
}
void file_countline_cin() {
std::string line;
int lineCount = 0;
while(std::getline(std::cin, line)) lineCount++;
std::cout << "file has: " << lineCount << " rows." << '\n';
}
void read_file_cin( std::string &nomeFile ) {
std::ifstream in(nomeFile.c_str());
std::streambuf *cinbuf = std::cin.rdbuf(); //save old buf
std::cin.rdbuf(in.rdbuf()); //redirect std::cin to in.txt!
file_countline_cin(); //call function
std::cin.rdbuf(cinbuf); //reset to standard input again
}
void populateVector( std::vector<int> &vec, int n ) {
for( int i = 0 ; i < n ; ++i ) vec.push_back( 0 + rand() % (3000 - 1 + 1)); //gli ultimi due num sono upper e lower range limits
sort( vec.begin(), vec.end() );
}
int generateRandInt( int l0, int u0 ) {
int n0 = l0 + rand() % (u0 - l0 + 1);
return n0;
}
然后我尝试通过以下方法加快CIN(https://www.geeksforgeeks.org/fast-io-for-competitive-programming/)的速度:
std::ios_base::sync_with_stdio(false);
std::cin.tie(NULL);
此处为 CIN.SPEED(v2)版本:
void read_file_cin( std::string &nomeFile );
void read_gnomad_index_cin( std::string &nomeFile, char &separator, std::vector<std::string> &vectorName );
void populateVector( std::vector<int> &vec, int n );
int generateRandInt( int l0, int u0 );
std::vector<int> sovrapposizioniVectorsInt( std::vector<int> &v0, std::vector<int> &v1 );
int main() {
std::string pathGnomadChrIndex = "/Volumes/enrico/gnomad/exomes/splitted_5c/ex_1_5c/chr_1/chr_1.txt";
char separator = ',';
std::vector<std::string> vSubfileNames;
read_gnomad_index_cin(pathGnomadChrIndex, separator, vSubfileNames);
srand((unsigned)time(NULL));
int size0 = 10;
std::vector<int> v0;
populateVector(v0, size0);
std::vector<std::string> vSubfileNames2;
for (auto si : v0) vSubfileNames2.push_back(vSubfileNames[si]);
std::string gnomadSplitDir = "/Volumes/enrico/gnomad/exomes/splitted_5c/ex_1_5c/chr_1/";
for ( auto vi : vSubfileNames2 ) {
std::cout << vi << '\n';
std::stringstream streamNomeGnomadChrDir;
streamNomeGnomadChrDir << gnomadSplitDir << vi;
std::string pathSubfile = streamNomeGnomadChrDir.str();
read_file_cin( pathSubfile );
}
}
void first_column_cin( char &separator, std::vector<std::string> &vectorName) {
/* SPEEDUP */
std::ios_base::sync_with_stdio(false);
std::cin.tie(NULL);
/* SPEEDUP */
std::string line;
int lineCount = 0;
while(std::getline(std::cin, line)) {
std::string filename;
std::istringstream iss(line); // string stream
getline(iss, filename, separator); // read first part up to comma, ignore the comma
vectorName.push_back(filename);
lineCount++;
}
}
void read_gnomad_index_cin( std::string &nomeFile, char &separator, std::vector<std::string> &vectorName ) {
/* SPEEDUP */
std::ios_base::sync_with_stdio(false);
std::cin.tie(NULL);
/* SPEEDUP */
std::ifstream in(nomeFile.c_str());
std::streambuf *cinbuf = std::cin.rdbuf(); //save old buf
std::cin.rdbuf(in.rdbuf()); //redirect std::cin to in.txt!
first_column_cin( separator, vectorName ); //call function
std::cin.rdbuf(cinbuf); //reset to standard input again
}
void file_countline_cin() {
/* SPEEDUP */
std::ios_base::sync_with_stdio(false);
std::cin.tie(NULL);
/* SPEEDUP */
std::string line;
int lineCount = 0;
while(std::getline(std::cin, line)) lineCount++;
std::cout << "file has: " << lineCount << " rows." << '\n';
}
void read_file_cin( std::string &nomeFile ) {
/* SPEEDUP */
std::ios_base::sync_with_stdio(false);
std::cin.tie(NULL);
/* SPEEDUP */
std::ifstream in(nomeFile.c_str());
std::streambuf *cinbuf = std::cin.rdbuf(); //save old buf
std::cin.rdbuf(in.rdbuf()); //redirect std::cin to in.txt!
file_countline_cin(); //call function
std::cin.rdbuf(cinbuf); //reset to standard input again
}
然后我尝试使用mmap的版本,将munmapped()和close()文件推送到向量中的所有文件内容,然后返回向量以分析文件(在此测试中仅对换行符进行计算)内容并关闭mmap和文件。
此处 MMAP.VECTOR(v3):
const char* map_file(const char* fname, size_t& length);
void mmap_file( std::string &filename, std::vector<char> &vFile);
void gnomadIndex( std::string &nomeFileIndex,
std::vector<std::string> &vSubfileNames,
std::vector<int> &vSubfileStarts,
std::vector<int> &vSubfileStops,
std::vector<std::vector<int>> &vSubfilePosizioniVector
);
void gnomadSubfileAnalysis( std::string &nomeFile );
void populateVector( std::vector<int> &vec, int n );
int generateRandInt( int l0, int u0 );
int main() {
std::string gnomadSplitDir = "/Volumes/enrico/gnomad/exomes/splitted_5c/";
std::string nomeChr = "1";
std::stringstream streamNomeGnomadChrDir;
streamNomeGnomadChrDir << gnomadSplitDir << "ex_" << nomeChr << "_5c/" << "chr_" << nomeChr << "/";
std::string pathGnomadChrDir = streamNomeGnomadChrDir.str();
std::stringstream streamNomeGnomadChrIndex;
streamNomeGnomadChrIndex << pathGnomadChrDir << "chr_" << nomeChr << ".txt";
std::string pathGnomadChrIndex = streamNomeGnomadChrIndex.str();
std::vector<std::string> vSubfileNames;
std::vector<int> vSubfileStarts;
std::vector<int> vSubfileStops;
std::vector<std::vector<int>> vSubfilePosizioniVector;
gnomadIndex(pathGnomadChrIndex,
vSubfileNames,
vSubfileStarts,
vSubfileStops,
vSubfilePosizioniVector
);
std::vector<std::string> vGnomadSubfilePaths;
srand((unsigned)time(NULL));
int size0 = 10;
std::vector<int> v0;
populateVector(v0, size0);
std::vector<std::string> vSubfileNames2;
for (auto si : v0) vSubfileNames2.push_back(vSubfileNames[si]);
for ( int subCount = 0; subCount < vSubfileNames2.size(); subCount++ ) {
std::stringstream streamNomeSubfileGnomad;
streamNomeSubfileGnomad << pathGnomadChrDir << vSubfileNames2[subCount];
std::string pathGnomadSubfile = streamNomeSubfileGnomad.str();
gnomadSubfileAnalysis(pathGnomadSubfile);
}
}
void gnomadSubfileAnalysis( std::string &nomeFile ) {
std::vector<char> f;
mmap_file(nomeFile, f);
std::vector<int> v0;
for (int i=0; i<f.size(); i++) if (f[i] == '\n') v0.push_back(i);
std::cout << "subfile: " << nomeFile << ", has: " << v0.size() << " rows in: " << f.size() << " bytes." << '\n';
}
void gnomadIndex( std::string &nomeFileIndex,
std::vector<std::string> &vSubfileNames,
std::vector<int> &vSubfileStarts,
std::vector<int> &vSubfileStops,
std::vector<std::vector<int>> &vSubfilePosizioniVector
) {
std::vector<char> f;
mmap_file(nomeFileIndex, f);
std::vector<int> v0;
v0.push_back(0); //la prima riga mi serve qui
for (int i=0; i<f.size(); i++) {
if (f[i] == '\n') v0.push_back(i+1);
}
v0.pop_back();
for (int nl = 0; nl < v0.size(); nl++) {
int ncol = 0;
std::vector<char> vSubfile;
std::vector<char> vSubStart;
std::vector<char> vSubStop;
std::vector<char> vSubPos;
std::vector<int> vSubfilePosVector;
for( int start=v0[nl]; start<v0[nl+1]; start++ ) {
if (f[start] == ',') ncol++;
if (ncol == 0) {
if ( f[start] != ',' ) {
vSubfile.push_back(f[start]);
}
}
if (ncol == 1) {
if ( f[start] != ',' ) {
vSubStart.push_back(f[start]);
}
}
if (ncol == 2) {
if ( f[start] != ',' ) {
vSubStop.push_back(f[start]);
}
}
if (ncol == 3) {
if ( f[start] != ',' ) {
if ( f[start] != ':' ) {
vSubPos.push_back(f[start]);
}
if ( f[start] == ':' || f[start] == '\n' ) {
std::string subfilePosValue ( vSubPos.begin(), vSubPos.end() );
vSubPos.clear();
vSubfilePosVector.push_back( stoi(subfilePosValue) );
subfilePosValue.erase();
}
}
}
}
std::string subFileValue ( vSubfile.begin(), vSubfile.end() );
vSubfile.clear();
std::string subfileStartValue ( vSubStart.begin(), vSubStart.end() );
vSubStart.clear();
std::string subfileStopValue ( vSubStop.begin(), vSubStop.end() );
vSubStop.clear();
vSubfileNames.push_back( subFileValue );
vSubfileStarts.push_back( stoi( subfileStartValue ) );
vSubfileStops.push_back( stoi(subfileStopValue) );
vSubfilePosizioniVector.push_back( vSubfilePosVector );
}
}
void handle_error(const char* msg) {
perror(msg);
exit(255);
}
const char* map_file(const char* fname, size_t& length) {
int fd = open(fname, O_RDONLY);
if (fd == -1)
handle_error("open");
struct stat sb;
if (fstat(fd, &sb) == -1)
handle_error("fstat");
length = sb.st_size;
const char* addr = static_cast<const char*>(mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0u));
if (addr == MAP_FAILED)
handle_error("mmap");
return addr;
}
void populateVector( std::vector<int> &vec, int n ) {
for( int i = 0 ; i < n ; ++i ) vec.push_back( 0 + rand() % (3000 - 1 + 1)); //gli ultimi due num sono upper e lower range limits
sort( vec.begin(), vec.end() );
}
int generateRandInt( int l0, int u0 ) {
int n0 = l0 + rand() % (u0 - l0 + 1);
return n0;
}
size_t getFilesize(const char* filename) {
struct stat st;
stat(filename, &st);
return st.st_size;
}
void mmap_file( std::string &filename, std::vector<char> &vFile) {
size_t filesize = getFilesize(filename.c_str());
int fd = open(filename.c_str(), O_RDONLY, 0);
assert(fd != -1);
void* mmappedData = mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0);
assert(mmappedData != MAP_FAILED);
const char* f = static_cast<const char*>(mmappedData);
for ( int i = 0; i < filesize; i++ ) vFile.push_back(f[i]);
int rc = munmap(mmappedData, filesize);
assert(rc == 0);
close(fd);
}
但是我立即看到它变慢了,我认为这可能是由于进一步填充了向量,所以我尝试了一个在mmap打开时将换行符计数的版本,然后将其关闭。
MMAP.CLOSE(v4):
const char* map_file(const char* fname, size_t& length);
void mmap_file( std::string &filename, std::vector<char> &vFile);
int mmap_file_nlines( std::string &filename );
void gnomadIndex( std::string &nomeFileIndex,
std::vector<std::string> &vSubfileNames,
std::vector<int> &vSubfileStarts,
std::vector<int> &vSubfileStops,
std::vector<std::vector<int>> &vSubfilePosizioniVector
);
void gnomadSubfileAnalysis( std::string &nomeFile );
void populateVector( std::vector<int> &vec, int n );
int generateRandInt( int l0, int u0 );
int main() {
std::string gnomadSplitDir = "/Volumes/enrico/gnomad/exomes/splitted_5c_text/";
std::string nomeChr = "1";
std::stringstream streamNomeGnomadChrDir;
streamNomeGnomadChrDir << gnomadSplitDir << "chr_" << nomeChr << "/";
std::string pathGnomadChrDir = streamNomeGnomadChrDir.str();
std::stringstream streamNomeGnomadChrIndex;
streamNomeGnomadChrIndex << pathGnomadChrDir << "chr_" << nomeChr << ".txt";
std::string pathGnomadChrIndex = streamNomeGnomadChrIndex.str();
std::vector<std::string> vSubfileNames;
std::vector<int> vSubfileStarts;
std::vector<int> vSubfileStops;
std::vector<std::vector<int>> vSubfilePosizioniVector;
gnomadIndex(pathGnomadChrIndex,
vSubfileNames,
vSubfileStarts,
vSubfileStops,
vSubfilePosizioniVector
);
std::vector<std::string> vGnomadSubfilePaths;
srand((unsigned)time(NULL));
int size0 = 10;
std::vector<int> v0;
populateVector(v0, size0);
std::vector<std::string> vSubfileNames2;
for (auto si : v0) vSubfileNames2.push_back(vSubfileNames[si]);
for ( int subCount = 0; subCount < vSubfileNames2.size(); subCount++ ) {
std::stringstream streamNomeSubfileGnomad;
streamNomeSubfileGnomad << pathGnomadChrDir << vSubfileNames2[subCount];
std::string pathGnomadSubfile = streamNomeSubfileGnomad.str();
gnomadSubfileAnalysis(pathGnomadSubfile);
}
}
void gnomadSubfileAnalysis( std::string &nomeFile ) {
int nLinee = mmap_file_nlines(nomeFile);
std::cout << "subfile: " << nomeFile << ", has: " << nLinee << " rows" << '\n';
}
void gnomadIndex( std::string &nomeFileIndex,
std::vector<std::string> &vSubfileNames,
std::vector<int> &vSubfileStarts,
std::vector<int> &vSubfileStops,
std::vector<std::vector<int>> &vSubfilePosizioniVector
) {
size_t length;
auto f = map_file(nomeFileIndex.c_str(), length);
auto l = f + length;
std::vector<int> v0;
v0.push_back(0); //la prima riga mi serve qui
for (int i=0; i<length; i++) {
if (f[i] == '\n') v0.push_back(i+1);
}
v0.pop_back();
for (int nl = 0; nl < v0.size(); nl++) {
int ncol = 0;
std::vector<char> vSubfile;
std::vector<char> vSubStart;
std::vector<char> vSubStop;
std::vector<char> vSubPos;
std::vector<int> vSubfilePosVector;
for( int start=v0[nl]; start<v0[nl+1]; start++ ) {
if (f[start] == ',') ncol++;
if (ncol == 0) {
if ( f[start] != ',' ) {
vSubfile.push_back(f[start]);
}
}
if (ncol == 1) {
if ( f[start] != ',' ) {
vSubStart.push_back(f[start]);
}
}
if (ncol == 2) {
if ( f[start] != ',' ) {
vSubStop.push_back(f[start]);
}
}
if (ncol == 3) {
if ( f[start] != ',' ) {
if ( f[start] != ':' ) {
vSubPos.push_back(f[start]);
}
if ( f[start] == ':' || f[start] == '\n' ) {
std::string subfilePosValue ( vSubPos.begin(), vSubPos.end() );
vSubPos.clear();
vSubfilePosVector.push_back( stoi(subfilePosValue) );
subfilePosValue.erase();
}
}
}
}
std::string subFileValue ( vSubfile.begin(), vSubfile.end() );
vSubfile.clear();
std::string subfileStartValue ( vSubStart.begin(), vSubStart.end() );
vSubStart.clear();
std::string subfileStopValue ( vSubStop.begin(), vSubStop.end() );
vSubStop.clear();
vSubfileNames.push_back( subFileValue );
vSubfileStarts.push_back( stoi( subfileStartValue ) );
vSubfileStops.push_back( stoi(subfileStopValue) );
vSubfilePosizioniVector.push_back( vSubfilePosVector );
}
}
void handle_error(const char* msg) {
perror(msg);
exit(255);
}
const char* map_file(const char* fname, size_t& length) {
int fd = open(fname, O_RDONLY);
if (fd == -1)
handle_error("open");
struct stat sb;
if (fstat(fd, &sb) == -1)
handle_error("fstat");
length = sb.st_size;
const char* addr = static_cast<const char*>(mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0u));
if (addr == MAP_FAILED)
handle_error("mmap");
return addr;
}
void populateVector( std::vector<int> &vec, int n ) {
for( int i = 0 ; i < n ; ++i ) vec.push_back( 0 + rand() % (3000 - 1 + 1)); //gli ultimi due num sono upper e lower range limits
sort( vec.begin(), vec.end() );
}
int generateRandInt( int l0, int u0 ) {
int n0 = l0 + rand() % (u0 - l0 + 1);
return n0;
}
size_t getFilesize(const char* filename) {
struct stat st;
stat(filename, &st);
return st.st_size;
}
void mmap_file( std::string &filename, std::vector<char> &vFile) {
size_t filesize = getFilesize(filename.c_str());
int fd = open(filename.c_str(), O_RDONLY, 0);
assert(fd != -1);
void* mmappedData = mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0);
assert(mmappedData != MAP_FAILED);
const char* f = static_cast<const char*>(mmappedData);
for ( int i = 0; i < filesize; i++ ) vFile.push_back(f[i]);
int rc = munmap(mmappedData, filesize);
assert(rc == 0);
close(fd);
}
int mmap_file_nlines( std::string &filename ) {
size_t filesize = getFilesize(filename.c_str());
int fd = open(filename.c_str(), O_RDONLY, 0);
assert(fd != -1);
void* mmappedData = mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0);
assert(mmappedData != MAP_FAILED);
const char* f = static_cast<const char*>(mmappedData);
int lineCount =0 ;
for ( int i = 0; i < filesize; i++ ) if( f[i] == '\n' ) lineCount++;
int rc = munmap(mmappedData, filesize);
assert(rc == 0);
close(fd);
return lineCount;
}
以下是每种方法进行1000次尝试的秒数:
cin 0.983380
cin.speed 0.989011
mmap.close 2.863860
mmap.open 0.915395
mmap.vector 4.683976
某种程度上,我感到惊讶的是CIN的“加速”没有改变,(也许我出了什么问题?)并且关闭文件对于mmap是如此耗时!
这里是结果图。
真的欢迎更多的专家发表评论!
PS:我的机器是 iMac(Retina 5K,27英寸,2014年末),4 GHz Intel Core i7、16 GB 1600 MHz DDR3