显示数据集中的字节数

时间:2015-04-27 17:38:12

标签: ruby byte

我编写了一个程序来创建IP地址,URL,错误代码和IP访问频率的直方图,并希望获得为每个直方图收集的数据的大小(以字节为单位)。我环顾四周,看了一下bytes方法,但似乎无法使其正常运行。

有关如何对这段代码执行此操作的任何想法?我想在每个方法中显示文件名之后添加“byte puts”行。

class CommonLog

  def initialize(logfile)
    @logfile = logfile
  end

  def readfile
    @readfile = File.readlines(@logfile).map { |line|
      line.split()
    }
    @readfile = @readfile.to_s.split(" ")
  end

  def ip_histogram
    @ip_count = 0
    @readfile.each_index { |index|
      if (@readfile[index] =~ /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/ )
        puts @readfile[index]
        puts @ip_count += 1
      end
    }
    puts File.basename @logfile
  end

  def url_histogram
    @url_count = 0 
    @readfile.each_index { |index|
      if (@readfile[index] =~ /\/{1}(([a-z]{4,})|(\~{1}))\:{0}\S+/ )
        puts @readfile[index]
        puts @url_count += 1
      end
    }
    puts File.basename @logfile
  end

  def requests_per_hour
    @time_count2 = 0 
    @time_count3 = 0
    @time_count4 = 0
    @time_count5 = 0
    @time_count6 = 0

    @readfile.each_index { |index|
      if (@readfile[index] =~ /\:\d{2}\:/ )
        @new = @readfile[index].split(":")
        if @new[1] == "02"
          @time_count2 += 1
        elsif @new[1] == "03"
          @time_count3 += 1
        elsif @new[1] == "04"
          @time_count4 += 1
        elsif @new[1] == "05"
          @time_count5 += 1
        elsif @new[1] == "06"
          @time_count6 += 1
        end
      end


    }   
    puts "#{@time_count2} instances during hour 2"
    puts "#{@time_count3} instances during hour 3"
    puts "#{@time_count4} instances during hour 4"
    puts "#{@time_count5} instances during hour 5"
    puts "#{@time_count6} instances during hour 6"
    puts File.basename @logfile
  end

  def sorted_list
    @codearray = Array.new
    @http_code_count = 0
    @count200 = 0
    @count304 =0
    @count301 = 0
    @count403 = 0

    @readfile.each_index { |index|
      if @readfile[index] =~ /([2][0][0]")|([3][0][4])|([3][0][1])|([4][0][3])/
        @codearray << @readfile[index]
        @http_code_count += 1
        if @readfile[index] == '200'
          @count200 += 1
        elsif @readfile[index] == "304"
          @count304 += 1
        elsif @readfile[index] == "301"
          @count301 += 1
          elseif @readfile[index] == "403"
          @count403 += 1
        end
      end
    }

    @hash_count = 0
    @frequencies = Hash.new(0)
    @codearray.each { |word| @frequencies[word] += 1 }
    @frequencies = @frequencies.sort_by { |a, b| a}
    @frequencies.each { |word, frequency| @hash_count += frequency}
    @frequencies.each { |key, value| 
      puts "Error #{key} : #{(value.to_f/@hash_count.to_f)*100}%" 
    }
    puts File.basename @logfile
  end

end

my_file = CommonLog.new("test_log")
my_file.readfile
my_file.ip_histogram
my_file.url_histogram
my_file.requests_per_hour
my_file.sorted_list

1 个答案:

答案 0 :(得分:1)

假设处理的字节数是整个size of each log file,你可以这样做:

class CommonLog

  attr_reader :bytes_read

  def initialize(logfile)
    @logfile = logfile
    @bytes_read = File.size(logfile)
  end

  # ... now simply print "bytes_read" when desired ...