Question

我正在尝试从文本文件导入数据并将其分配给变量，以便我可以使用函数对其进行分析。数据采用以下格式：

Run 141544 event 5
Njets 0
m1: pt,eta,phi,m= 231.277 0.496237 -2.22082 0.1 dptinv: 0.000370146
m2: pt,eta,phi,m= 222.408 -0.198471 0.942319 0.1 dptinv: 0.00038302

Run 141544 event 7
Njets 1
m1: pt,eta,phi,m= 281.327 -0.489914 1.12498 0.1 dptinv: 0.000406393
m2: pt,eta,phi,m= 238.38 0.128715 -2.07527 0.1 dptinv: 0.000399279

... 大约有15000个条目，每个条目有四条线。在每一行上，值由空格分隔，并且在每个条目之间有一个空行。因为条目的每一行都是不同的格式，所以我写了一个循环来分隔案例。我遇到的问题是，分配变量的代码似乎有问题。当我使用循环输出某种类型的线条，一切都运行完美。但是一旦我尝试将每一行分解为变量并分配和打印变量，程序就会多次打印同一行并崩溃。这是我的代码：

#include <iostream>
#include <fstream>
#include <sstream>
#include <cmath>
#include <numeric>
#include <vector>
#include <algorithm>
#include <string>
#include <cstring>
#include <iterator>
using namespace std;
using std::cout;
using std::endl;

struct rowtype1 // structure of lines containing run data
{
    string runnumber;
    string eventnumber;
};

struct rowtype2 // structure of lines containing data for muon1 and muon2
{
    string ptvalue1;
    string etavalue1;
    string phivalue1;
    string massvalue1;
};

vector<rowtype1> row1values;
vector<rowtype2> row2values;

int main()
{
    string line;
    ifstream inData;
    inData.open("/Users/Edward/Downloads/muons.txt");

    if (inData.is_open())
    {
        while ( inData.good() )
        {
            while (getline(inData,line))
            {
                if (line[0] == 'N') // recognizes and skips blank lines
                {
                    continue;
                }
                else if (line[1] == 'u') // recognizes lines containing run information
                {
                    istringstream ss(line);
                    istream_iterator<string> begin(ss), end;
                    vector<string> words(begin, end);
                    rowtype1 s { words[1], words[3]};
                    row1values.push_back(s);
                    for(auto && s : row1values)
                        cout << "run " << s.runnumber << " " << "event " << s.eventnumber << "\n";
                }
                else if (line[1] == '1') // recognizes lines containing muon1 information
                {
                    istringstream ss(line);
                    istream_iterator<string> begin(ss), end;
                    vector<string> words(begin, end);
                    rowtype2 s { words[2], words[3], words[4], words[5] };
                    row2values.push_back(s);
                    for(auto && s : row2values)
                        cout << "m1 " << s.ptvalue1 << " " << s.etavalue1 << " " << s.phivalue1 << " " << s.massvalue1 << "\n";
                }
                else if (line[1] == '2') // recognizes lines containing muon2 information
                {
                  istringstream ss(line);
                    istream_iterator<string> begin(ss), end;
                    vector<string> words(begin, end);
                    rowtype2 s { words[2], words[3], words[4], words[5] };
                    row2values.push_back(s);
                    for(auto && s : row2values)
                        cout << "m2 " << s.ptvalue1 << " " << s.etavalue1 << " " << s.phivalue1 << " " << s.massvalue1 << "\n";
                }
            }
        }
        inData.close();
    }
    return 0;
};

为了测试是否正确分配了变量，我让代码输出了它们的值。输出看起来像这样：

，而不是循环遍历行并输出变量

run 141544 event 5
Run 141544 event 5
m1 231.277 0.496237 -2.22082 0.1
m2 231.277 0.496237 -2.22082 0.1
m2 222.408 -0.198471 0.942319 0.1
run 141544 event 5
run 141544 event 7
Run 141544 event 7
m1 231.277 0.496237 -2.22082 0.1
m1 222.408 -0.198471 0.942319 0.1
m1 281.327 -0.489914 1.12498 0.1
m2 231.277 0.496237 -2.22082 0.1
m2 222.408 -0.198471 0.942319 0.1
m2 281.327 -0.489914 1.12498 0.1
m2 238.38 0.128715 -2.07527 0.1
run 141544 event 5
run 141544 event 7
run 141572 event 2

Answer 1

您的代码存在太多问题，我不会详细讨论。

主要是，我认为您的问题与您没有正确解析文件，并且您的变量在分配中未对齐这一事实有关。

在尝试修复它并使其更加模块化时，我只是将其重写为以下内容（我没有执行任何检查 - 这是你可以为自己做的事情。 所有数据都是假设是正确的。）：

#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <cstdlib>
#include <vector>

typedef std::vector<int> ivec;
typedef std::vector<double> dvec;
typedef std::vector<std::string> svec;


void get_runs_events(std::string const &varstr, ivec &runs, ivec &events) {
  std::istringstream iss(varstr);
  std::string t1, t2;

  int run = 0, event = 0;

  if (iss >> t1 >> run >> t2 >> event) {
    runs.push_back(run);
    events.push_back(event);
  }
}

void get_njets(std::string const &varstr, ivec &njets) {
  std::istringstream iss(varstr);
  std::string t1;

  int njet;

  if (iss >> t1 >> njet) {
    njets.push_back(njet);
  }
}

void set_m_params(std::string const &varstr, dvec &pt, dvec &eta, dvec &phi, dvec &m, dvec &dptinv) {
  std::string dpt = "dptinv:";
  std::string pre_str = varstr.substr(varstr.find('=') + 1);
  std::string str = pre_str.substr(0, pre_str.find(dpt));

  std::string dpt_value = pre_str.substr(pre_str.find(dpt) + dpt.length());

  double m_pt, m_eta, m_phi, m_m, m_dptinv;

  std::istringstream iss(str);

  if (iss >> m_pt >> m_eta >> m_phi >> m_m) {
    pt.push_back(m_pt);
    eta.push_back(m_eta);
    phi.push_back(m_phi);
    m.push_back(m_m);
  }

  iss.str(dpt_value);
  iss.clear();

  if (iss >> m_dptinv) {
    dptinv.push_back(m_dptinv);
  }
}

int main() {
  std::ifstream ifile("text", std::ifstream::in);
  std::string temp;

  ivec runs, events, njets;
  dvec m1_pt, m1_eta, m1_phi, m1_m, m1_dptinv;
  dvec m2_pt, m2_eta, m2_phi, m2_m, m2_dptinv;

  svec raw;

  if (ifile.is_open()) {

    while(std::getline(ifile, temp)) {
      raw.push_back(temp);
    }

    int i = 0;

    //now iterate over the raw data and accordingly, fill the containers
    //Why i % 5? Because although you said your lines repeat every 4 lines,
    //in actuality, they repeat every FIVE lines as the blank line counts as one.
    //There are many ways to go about this, but my implementation reads the entire file
    //line by line and skips the 5th line, or in the case of a i % 5 case, that
    //would be i % 5 == 4. Since that's assumed to be invalid, I ignored it entirely,
    //hence my code, as shown below.
    for (svec::const_iterator it = raw.begin(); it != raw.end(); ++it, ++i) {
      if (i % 5 == 0) {
        get_runs_events(*it, runs, events);
      }
      else if (i % 5 == 1) {
        get_njets(*it, njets);
      }
      else if (i % 5 == 2) {
        set_m_params(*it, m1_pt, m1_eta, m1_phi, m1_m, m1_dptinv);
      }
      else if (i % 5 == 3) {
        set_m_params(*it, m2_pt, m2_eta, m2_phi, m2_m, m2_dptinv);
      }
    }

    //now output the information to see that it is correct
    for (i = 0; i < runs.size(); ++i) {
      std::cout << runs[i] << " " << events[i] << " " << njets[i] << "\n";
      std::cout << m1_pt[i] << " " << m1_eta[i] << " " << m1_phi[i] << " " << m1_m[i] << " " << m1_dptinv[i] << "\n";
      std::cout << m2_pt[i] << " " << m2_eta[i] << " " << m2_phi[i] << " " << m2_m[i] << " " << m2_dptinv[i] << "\n\n";
    }
  }
  else {
    exit(1);
  }

  ifile.close();

  return 0;
}

使用此数据（与原始数据略有修改）：

Run 141544 event 5
Njets 0
m1: pt,eta,phi,m= 231.277 0.496237 -2.22082 0.1 dptinv: 0.000370146
m2: pt,eta,phi,m= 222.408 -0.198471 0.942319 0.1 dptinv: 0.00038302

Run 141545 event 7
Njets 1
m1: pt,eta,phi,m= 281.327 -0.489914 1.12498 0.1 dptinv: 0.000406393
m2: pt,eta,phi,m= 238.38 0.128715 -2.07527 0.1 dptinv: 0.00039927

Run 141546 event 5
Njets 0
m1: pt,eta,phi,m= 231.277 0.496237 -2.22082 0.1 dptinv: 0.000370146
m2: pt,eta,phi,m= 222.408 -0.198471 0.942319 0.1 dptinv: 0.00038302

Run 141547 event 7
Njets 1
m1: pt,eta,phi,m= 281.327 -0.489914 1.12498 0.1 dptinv: 0.000406393
m2: pt,eta,phi,m= 238.38 0.128715 -2.07527 0.1 dptinv: 0.00039927

您可以获得以下订购的正确结果：

第1行： {run} {event} {njet}
第二行： {m1 pt} {m1 eta} {m1 phi} {m1 m} {m1 dptinv}
第3行： {m2 pt} {m2 eta} {m2 phi} {m2 m} {m2 dptinv}

以下输出：

141544 5 0
231.277 0.496237 -2.22082 0.1 0.000370146
222.408 -0.198471 0.942319 0.1 0.00038302

141545 7 1
281.327 -0.489914 1.12498 0.1 0.000406393
238.38 0.128715 -2.07527 0.1 0.00039927

141546 5 0
231.277 0.496237 -2.22082 0.1 0.000370146
222.408 -0.198471 0.942319 0.1 0.00038302

141547 7 1
281.327 -0.489914 1.12498 0.1 0.000406393
238.38 0.128715 -2.07527 0.1 0.00039927

Answer 2

您正在使用if..else if..区分所有行“类型”，但在每个if块的末尾添加continue。使用if.. continue（首选，我认为）或if .. else if ..的单个长块。

无论如何，您的run x行是否重复？有一行

cout << line;

在“运行”案例测试结束时。这是你的问题吗？

Answer 3

您的一个问题是，在每次迭代尝试访问line，line[0]之前，您都不会检查line[1]是否为空。数据文件中的空行将导致line为空，因此使用operator[]访问它将产生未定义的行为（see here）。

因此，应该从

更改主循环内部的第一次检查

if (line[0] == 'N') { continue; }

到

if (line.empty() || line[0] == 'N') { continue; }

在line和words上包含额外的验证检查是明智的，以确保避免未定义的行为。例如，您应该确保line的长度至少为2，并确保生成的words向量也是预期长度。

除了这些问题，你会看到重复的行有两个原因：

如果line：line[1] == 'u'

cout << line;

当你点击rowtype1或rowtype2的情况时，你遍历整个row1values或row2values并打印到目前为止累积行的所有。

从rowtype1案例中删除cout << line;。然后从每个案例中删除循环for(auto && s : ... )（但保留其内部！），这样您只打印刚从中读取的当前 s的相关值文件。

从文本文件导入的不同线型创建变量

3 个答案: