已读取了多少文件

时间:2015-04-09 17:21:13

标签: c++ c++11 stream

我有一个程序读取10 MByte文件,并在4K块中读取数据时处理数据。测试通常需要1分钟 - 2分钟。但是有些情况下程序需要超过10分钟,此时它会杀死测试并生成核心。以下是读取文件的代码:

    string filename("data.out");
    ifstream ifs;
    vector<char> buf(4096);

    ifs.open(filename,  ios::in | ios::binary);
    if (!ifs.is_open()) {
            cout << "ERROR : " << filename << "can't be opened." << endl;
            VERIFY(ifs.is_open());
    }

    while (!ifs.eof()) {
            ifs.read(buf.data(), buf.size());     <======== Line 1
            process_data (buf.data(), ifs.gcount());   <======== Line 2
    }
    ifs.close();

我有两个核心,显示程序卡在第1行和第2行。

第1行的core1的bt顶部:

#0  0x00007f942a462175 in std::istream::read (this=0x7fff4ce69de0,
__s=0x9120000 "\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324\324"..., __n=4096) at /home/packages/gcc/4.7/w/gcc-4.7-4.7.2/build/x86_64-linux-gnu/libstdc++-v3/include/bits/istream.tcc:651

第2行的core2的bt顶部:

#0  0x00000000004375f3 in std::__addressof<char> (__r=@0x7fa3176391a6: -128 '\200') at /usr/include/c++/4.7/bits/move.h:47
#1  0x0000000000436cd4 in std::vector<char, std::allocator<char> >::data (this=0x7fff346ad770)
at /usr/include/c++/4.7/bits/stl_vector.h:859

最初,从core1开始,我认为问题出在ifs.read()上需要很长时间。但在第二个核心之后,我认为这个问题可能与vector :: data()。

有关

通过检查存储在ifstream中的某些字段(例如文件偏移量),我是否可以判断文件的任何部分是否已被读取。

我不喜欢发布大型结构的转储,但是如果有人能够解释一下如何从这个转储中弄清楚已经读取了多少10MB。

(gdb) p ifs
$3 = warning: can't find linker symbol for virtual table for `std::basic_ifstream<char, std::char_traits<char> >' value
{
  <std::basic_istream<char, std::char_traits<char> >> = {
    <std::basic_ios<char, std::char_traits<char> >> = {
      <std::ios_base> = {
        _vptr.ios_base = 0xfbfcc0,
        static boolalpha = std::_S_boolalpha,
        static dec = std::_S_dec,
        static fixed = std::_S_fixed,
        static hex = std::_S_hex,
        static internal = std::_S_internal,
        static left = std::_S_left,
        static oct = std::_S_oct,
        static right = std::_S_right,
        static scientific = std::_S_scientific,
        static showbase = std::_S_showbase,
        static showpoint = std::_S_showpoint,
        static showpos = std::_S_showpos,
        static skipws = std::_S_skipws,
        static unitbuf = std::_S_unitbuf,
        static uppercase = std::_S_uppercase,
        static adjustfield = std::_S_adjustfield,
        static basefield = std::_S_basefield,
        static floatfield = std::_S_floatfield,
        static badbit = std::_S_badbit,
        static eofbit = std::_S_eofbit,
        static failbit = std::_S_failbit,
        static goodbit = std::_S_goodbit,
        static app = std::_S_app,
        static ate = std::_S_ate,
        static binary = std::_S_bin,
        static in = std::_S_in,
        static out = std::_S_out,
        static trunc = std::_S_trunc,
        static beg = std::_S_beg,
        static cur = std::_S_cur,
        static end = std::_S_end,
        _M_precision = 6,
        _M_width = 0,
        _M_flags = 4098,
        _M_exception = std::_S_goodbit,
        _M_streambuf_state = 5,
        _M_callbacks = 0x0,
        _M_word_zero = {
          _M_pword = 0x0,
          _M_iword = 0
        },
        _M_local_word = {{
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }, {
            _M_pword = 0x0,
            _M_iword = 0
          }},
        _M_word_size = 8,
        _M_word = 0x7fff4ce69f20,
        _M_ios_locale = {
          static none = 0,
          static ctype = 1,
          static numeric = 2,
          static collate = 4,
          static time = 8,
          static monetary = 16,
          static messages = 32,
          static all = 63,
          _M_impl = 0x7f942a6e3aa0,
          static _S_classic = 0x7f942a6e3aa0,
          static _S_global = 0x7f942a6e3aa0,
          static _S_categories = 0x7f942a6c86a0,
          static _S_once = 2
        }
      },
      members of std::basic_ios<char, std::char_traits<char> >:
      _M_tie = 0x0,
      _M_fill = 0 '\000',
      _M_fill_init = false,
      _M_streambuf = 0x7fff4ce69df0,
      _M_ctype = 0x7f942a6e3d20,
      _M_num_put = 0x7f942a6e4040,
      _M_num_get = 0x7f942a6e4030
    },
    members of std::basic_istream<char, std::char_traits<char> >:
    _vptr.basic_istream = 0xfbfc98,
    _M_gcount = 0
  },
  members of std::basic_ifstream<char, std::char_traits<char> >:
  _M_filebuf = warning: can't find linker symbol for virtual table for `std::basic_filebuf<char, std::char_traits<char> >' value
{
    <std::basic_streambuf<char, std::char_traits<char> >> = {
      _vptr.basic_streambuf = 0xfc0a70,
      _M_in_beg = 0x6306000 "\317\317\317\......320\320\320\320"...,
      _M_in_cur = 0x6307fff "",
      _M_in_end = 0x6307fff "",
      _M_out_beg = 0x0,
      _M_out_cur = 0x0,
      _M_out_end = 0x0,
      _M_buf_locale = {
        static none = 0,
        static ctype = 1,
        static numeric = 2,
        static collate = 4,
        static time = 8,
        static monetary = 16,
        static messages = 32,
        static all = 63,
        _M_impl = 0x7f942a6e3aa0,
        static _S_classic = 0x7f942a6e3aa0,
        static _S_global = 0x7f942a6e3aa0,
        static _S_categories = 0x7f942a6c86a0,
        static _S_once = 2
      }
    },
    members of std::basic_filebuf<char, std::char_traits<char> >:
    _M_lock = {
      __data = {
        __lock = 0,
        __count = 0,
        __owner = 0,
        __nusers = 0,
        __kind = 0,
        __spins = 0,
        __list = {
          __prev = 0x0,
          __next = 0x0
        }
      },
      __size = '\000' <repeats 39 times>,
      __align = 0
    },
    _M_file = {
      _M_cfile = 0x70186c0,
      _M_cfile_created = true
    },
    _M_mode = 12,
    _M_state_beg = {
      __count = 0,
      __value = {
        __wch = 0,
        __wchb = "\000\000\000"
      }
    },
    _M_state_cur = {
      __count = 0,
      __value = {
        __wch = 0,
        __wchb = "\000\000\000"
      }
    },
    _M_state_last = {
      __count = 0,
      __value = {
        __wch = 0,
        __wchb = "\000\000\000"
      }
    },
    _M_buf = 0x6306000 "\317\317\317\317\317\......320\320\320\320\320"...,
    _M_buf_size = 8192,
    _M_buf_allocated = true,
    _M_reading = true,
    _M_writing = false,
    _M_pback = 0 '\000',
    _M_pback_cur_save = 0x0,
    _M_pback_end_save = 0x0,
    _M_pback_init = false,
    _M_codecvt = 0x7f942a6e3f60,
    _M_ext_buf = 0x0,
    _M_ext_buf_size = 0,
    _M_ext_next = 0x0,
    _M_ext_end = 0x0
  }
}
(gdb)

谢谢你, 艾哈迈德。

1 个答案:

答案 0 :(得分:0)

不要循环eof

while (ifs.read(buf.data(), buf.size())) {
  size_t read = ifs.gcount();
  if(read==0) break; // don't trust passing `0` to `process_data`:
  process_data(buf.data(), read);
  if (read<buf.size()) break; // if we finished, end.
}

最好通过尝试io来完成输入的结束,并注意出错了。在这种情况下,我们读取,计算我们读取的字节数,当我们读取0个字节或读取的字节数比我们预期的读数少时,我们决定不再有任何数据。

如果IO操作在ifs上设置了任何failbit,我们也会结束。