Question

我的代码有问题。我需要对具有以下结构的日志文件执行多个操作：

190.12.1.100 2011-03-02 12:12 test.html  
190.12.1.100 2011-03-03 13:18 data.html  
128.33.100.1 2011-03-03 15:25 test.html  
128.33.100.1 2011-03-04 18:30 info.html

我需要根据IP获取每月访问次数，每页访问次数和唯一身份访问者数量。这不是问题，我设法让所有三个操作都工作。问题是，只有第一个选项正确运行而其他选项之后只返回值0，就像文件是空的一样，所以我猜我在某个地方犯了I / O错误。这是代码：

import java.io.*;
import java.util.*;

public class WebServerAnalyzer {

private Map<String, Integer> hm1;
private Map<String, Integer> hm2;
private int[] months;
private Scanner input;

public WebServerAnalyzer() throws IOException {
  hm1 = new HashMap<String, Integer>();
  hm2 = new HashMap<String, Integer>();
  months = new int[12];
  for (int i = 0; i < 12; i++) {
      months[i] = 0;
  }
  File file = new File("webserver.log");
  try {
      input = new Scanner(file);
  } catch (FileNotFoundException fne) {
      input = null;
  }
}

public String nextLine() {
  String line = null;
  if (input != null && input.hasNextLine()) {
    line = input.nextLine();
  }
  return line;
}

public int getMonth(String line) {
  StringTokenizer tok = new StringTokenizer(line);
  if (tok.countTokens() == 4) {
    String ip = tok.nextToken();
    String date = tok.nextToken();
    String hour = tok.nextToken();
    String page = tok.nextToken();
    StringTokenizer dtok = new StringTokenizer(date, "-");
    if (dtok.countTokens() == 3) {
      String year = dtok.nextToken();
      String month = dtok.nextToken();
      String day = dtok.nextToken();
      int m = Integer.parseInt(month);
        return m;
    }
  }
  return -1;
}

public String getIP(String line) {
  StringTokenizer tok = new StringTokenizer(line);
  if (tok.countTokens() == 4) {
    String ip = tok.nextToken();
    String date = tok.nextToken();
    String hour = tok.nextToken();
    String page = tok.nextToken();
    StringTokenizer dtok = new StringTokenizer(date, "-");
      return ip;
  }
  return null;
}

public String getPage(String line) {
  StringTokenizer tok = new StringTokenizer(line);
  if (tok.countTokens() == 4) {
    String ip = tok.nextToken();
    String date = tok.nextToken();
    String hour = tok.nextToken();
    String page = tok.nextToken();
    StringTokenizer dtok = new StringTokenizer(date, "-");
      return page;
  }
  return null;
}

public void visitsPerMonth() {
  String line = null;
  do {
    line = nextLine();
    if (line != null) {
      int m = getMonth(line);
      if (m != -1) {
        months[m - 1]++;
      }
    }
  } while (line != null);

  // Print the result
  String[] monthName = {"JAN ", "FEB ", "MAR ",
      "APR ", "MAY ", "JUN ", "JUL ", "AUG ", "SEP ",
      "OCT ", "NOV ", "DEC "};
  for (int i = 0; i < 12; i++) {
    System.out.println(monthName[i] + months[i]);
  }
}

public int count() throws IOException {
  InputStream is = new BufferedInputStream(new FileInputStream("webserver.log"));
  try {
    byte[] c = new byte[1024];
    int count = 0;
    int readChars = 0;
    while ((readChars = is.read(c)) != -1) {
      for (int i = 0; i < readChars; ++i) {
        if (c[i] == '\n')
          ++count;
      }
    }
    return count;
  } finally {
    is.close();
  }
}


public void UniqueIP() throws IOException{
  String line = null;
  for (int x = 0; x <count(); x++){
    line = nextLine();
    if (line != null) {
      if(hm1.containsKey(getIP(line)) == false) {
        hm1.put(getIP(line), 1);
      } else {
        hm1.put(getIP(line), hm1.get(getIP(line)) +1 );
      }
    }
  }

  Set set = hm1.entrySet();
  Iterator i = set.iterator();
  System.out.println("\nNumber of unique visitors: " + hm1.size());
  while(i.hasNext()) {
    Map.Entry me = (Map.Entry)i.next();
    System.out.print(me.getKey() + " - ");
    System.out.println(me.getValue() + " visits");
  }
}

public void pageVisits() throws IOException{
  String line = null;
  for (int x = 0; x <count(); x++){
    line = nextLine();
    if (line != null) {
      if(hm2.containsKey(getPage(line)) == false)
        hm2.put(getPage(line), 1);
      else
        hm2.put(getPage(line), hm2.get(getPage(line)) +1 );
    }
  }
  Set set = hm2.entrySet();
  Iterator i = set.iterator();
  System.out.println("\nNumber of pages visited: " + hm2.size());
  while(i.hasNext()) {
    Map.Entry me = (Map.Entry)i.next();
    System.out.print(me.getKey() + " - ");
    System.out.println(me.getValue() + " visits");
  }
}

任何帮助解决问题的人都会非常感激，因为我很困惑。

Answer 1

我还没有彻底阅读代码，但我猜你在开始新操作时没有将读取位置设置回文件的开头。因此nextLine()将返回null。

您应该为每个操作创建一个新扫描仪，然后关闭它。 AFAIK扫描仪不提供返回第一个字节的方法。

目前我还可以考虑3种选择：

使用BufferedReader并为每个新操作调用reset()。如果您没有在某处调用mark()，这应该会使读者返回到字节0。
读取文件内容一次并遍历内存中的行，即将所有行放入List<String>，然后从每行开始。
读取文件一次，解析每一行并构建包含所需数据的适当数据结构。例如，您可以使用TreeMap<Date, Map<Page, Map<IPAdress, List<Visit>>>>，即每个日期存储每页每个IP地址的访问次数。然后，您可以按日期，页面和IP地址选择适当的子图。

Answer 2

建议reset的{{1}} BufferedReader方法只有在文件大小小于缓冲区大小或者调用Thomas且读取时间足够大时才会起作用超前限制。

我建议您阅读该文件一次并更新每行的地图和月份数组。顺便说一句，你不需要扫描器来读取行，BufferedReader本身就有readLine方法。

BufferedReader br = ...;
String line;
while (null != (line = br.readLine())) {
    String ip = getIP(line);
    String page = getPage(line);
    int month = getMonth(line);
    // update hashmaps and arrays
}

Java文件I / O帮助

2 个答案: