我的代码有问题。我需要对具有以下结构的日志文件执行多个操作:
190.12.1.100 2011-03-02 12:12 test.html
190.12.1.100 2011-03-03 13:18 data.html
128.33.100.1 2011-03-03 15:25 test.html
128.33.100.1 2011-03-04 18:30 info.html
我需要根据IP获取每月访问次数,每页访问次数和唯一身份访问者数量。这不是问题,我设法让所有三个操作都工作。问题是,只有第一个选项正确运行而其他选项之后只返回值0,就像文件是空的一样,所以我猜我在某个地方犯了I / O错误。这是代码:
import java.io.*;
import java.util.*;
public class WebServerAnalyzer {
private Map<String, Integer> hm1;
private Map<String, Integer> hm2;
private int[] months;
private Scanner input;
public WebServerAnalyzer() throws IOException {
hm1 = new HashMap<String, Integer>();
hm2 = new HashMap<String, Integer>();
months = new int[12];
for (int i = 0; i < 12; i++) {
months[i] = 0;
}
File file = new File("webserver.log");
try {
input = new Scanner(file);
} catch (FileNotFoundException fne) {
input = null;
}
}
public String nextLine() {
String line = null;
if (input != null && input.hasNextLine()) {
line = input.nextLine();
}
return line;
}
public int getMonth(String line) {
StringTokenizer tok = new StringTokenizer(line);
if (tok.countTokens() == 4) {
String ip = tok.nextToken();
String date = tok.nextToken();
String hour = tok.nextToken();
String page = tok.nextToken();
StringTokenizer dtok = new StringTokenizer(date, "-");
if (dtok.countTokens() == 3) {
String year = dtok.nextToken();
String month = dtok.nextToken();
String day = dtok.nextToken();
int m = Integer.parseInt(month);
return m;
}
}
return -1;
}
public String getIP(String line) {
StringTokenizer tok = new StringTokenizer(line);
if (tok.countTokens() == 4) {
String ip = tok.nextToken();
String date = tok.nextToken();
String hour = tok.nextToken();
String page = tok.nextToken();
StringTokenizer dtok = new StringTokenizer(date, "-");
return ip;
}
return null;
}
public String getPage(String line) {
StringTokenizer tok = new StringTokenizer(line);
if (tok.countTokens() == 4) {
String ip = tok.nextToken();
String date = tok.nextToken();
String hour = tok.nextToken();
String page = tok.nextToken();
StringTokenizer dtok = new StringTokenizer(date, "-");
return page;
}
return null;
}
public void visitsPerMonth() {
String line = null;
do {
line = nextLine();
if (line != null) {
int m = getMonth(line);
if (m != -1) {
months[m - 1]++;
}
}
} while (line != null);
// Print the result
String[] monthName = {"JAN ", "FEB ", "MAR ",
"APR ", "MAY ", "JUN ", "JUL ", "AUG ", "SEP ",
"OCT ", "NOV ", "DEC "};
for (int i = 0; i < 12; i++) {
System.out.println(monthName[i] + months[i]);
}
}
public int count() throws IOException {
InputStream is = new BufferedInputStream(new FileInputStream("webserver.log"));
try {
byte[] c = new byte[1024];
int count = 0;
int readChars = 0;
while ((readChars = is.read(c)) != -1) {
for (int i = 0; i < readChars; ++i) {
if (c[i] == '\n')
++count;
}
}
return count;
} finally {
is.close();
}
}
public void UniqueIP() throws IOException{
String line = null;
for (int x = 0; x <count(); x++){
line = nextLine();
if (line != null) {
if(hm1.containsKey(getIP(line)) == false) {
hm1.put(getIP(line), 1);
} else {
hm1.put(getIP(line), hm1.get(getIP(line)) +1 );
}
}
}
Set set = hm1.entrySet();
Iterator i = set.iterator();
System.out.println("\nNumber of unique visitors: " + hm1.size());
while(i.hasNext()) {
Map.Entry me = (Map.Entry)i.next();
System.out.print(me.getKey() + " - ");
System.out.println(me.getValue() + " visits");
}
}
public void pageVisits() throws IOException{
String line = null;
for (int x = 0; x <count(); x++){
line = nextLine();
if (line != null) {
if(hm2.containsKey(getPage(line)) == false)
hm2.put(getPage(line), 1);
else
hm2.put(getPage(line), hm2.get(getPage(line)) +1 );
}
}
Set set = hm2.entrySet();
Iterator i = set.iterator();
System.out.println("\nNumber of pages visited: " + hm2.size());
while(i.hasNext()) {
Map.Entry me = (Map.Entry)i.next();
System.out.print(me.getKey() + " - ");
System.out.println(me.getValue() + " visits");
}
}
任何帮助解决问题的人都会非常感激,因为我很困惑。
答案 0 :(得分:4)
我还没有彻底阅读代码,但我猜你在开始新操作时没有将读取位置设置回文件的开头。因此nextLine()
将返回null。
您应该为每个操作创建一个新扫描仪,然后关闭它。 AFAIK扫描仪不提供返回第一个字节的方法。
目前我还可以考虑3种选择:
使用BufferedReader
并为每个新操作调用reset()
。如果您没有在某处调用mark()
,这应该会使读者返回到字节0。
读取文件内容一次并遍历内存中的行,即将所有行放入List<String>
,然后从每行开始。
读取文件一次,解析每一行并构建包含所需数据的适当数据结构。例如,您可以使用TreeMap<Date, Map<Page, Map<IPAdress, List<Visit>>>>
,即每个日期存储每页每个IP地址的访问次数。然后,您可以按日期,页面和IP地址选择适当的子图。
答案 1 :(得分:2)
建议reset
的{{1}} BufferedReader
方法只有在文件大小小于缓冲区大小或者调用Thomas且读取时间足够大时才会起作用超前限制。
我建议您阅读该文件一次并更新每行的地图和月份数组。顺便说一句,你不需要扫描器来读取行,BufferedReader本身就有readLine
方法。
BufferedReader br = ...;
String line;
while (null != (line = br.readLine())) {
String ip = getIP(line);
String page = getPage(line);
int month = getMonth(line);
// update hashmaps and arrays
}