我在编写CSV文件时遇到一些问题。该程序,我使用HTML praser从网站获取数据。然后它将该数据保存到.csv文件。
我遇到的问题是,它不会将所有数据写入文件(而是将其中的一部分截断),并且当有多个url时,它根本不会写入它们!
要使用该程序,首先输入一个目录,然后在顶部区域输入这些网址。
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=04541GEL2
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31359T8L5
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31395RGT9
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=57643LJU1
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31358RRC9
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31392V6H0
按发送信息并按开始。它应该记录底部框中的所有内容。
以下是该计划的代码:
package com.js.extract;
import java.io.*;
import java.util.*;
import javax.swing.*;
import java.awt.*;
import java.awt.event.*;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
public class Extraction_GUI extends JFrame {
/**
*
*/
private static final long serialVersionUID = 1L;
protected JButton start;
protected JButton sendInfo;
protected JTextArea infoArea;
protected static JTextArea log;
protected JScrollPane sp;
protected JScrollPane sp2;
protected JSplitPane pane;
protected JPanel mainPanel;
protected JPanel aPanel;
protected JPanel lPanel;
protected int areaStatus = 0;
static protected Calendar cal = Calendar.getInstance();
protected static ArrayList<String> urls = new ArrayList<String>();
public Extraction_GUI(){
super("Extraction by Jeel Shah");
setSize(660,520);
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
setLocationRelativeTo(null);
setResizable(false);
mainPanel = new JPanel();
mainPanel.setLayout(new FlowLayout());
aPanel = new JPanel();
aPanel.setLayout(new FlowLayout());
lPanel = new JPanel();
lPanel.setLayout(new FlowLayout());
start = new JButton("start");
sendInfo = new JButton("send info");
infoArea = new JTextArea("http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=3133F0GM5"
,13,55);
log = new JTextArea(10,55);
log.setEditable(false);
sp = new JScrollPane(infoArea);
sp.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
sp.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
sp2 = new JScrollPane(log);
sp2.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
sp2.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
aPanel.add(sp);
lPanel.add(sp2);
pane = new JSplitPane(JSplitPane.VERTICAL_SPLIT,aPanel,lPanel);
mainPanel.add(pane);
mainPanel.add(start);
mainPanel.add(sendInfo);
add(mainPanel);
setVisible(true);
final String toSave = JOptionPane.showInputDialog(null, "Please enter where you would like to save your files: ");
log.append("Data will be saved to: "+toSave+"\n");
sendInfo.addActionListener(new infoListener());
start.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
try {
start(toSave);
} catch (ParserException e1) {
e1.printStackTrace();
} catch (IOException e2) {
e2.printStackTrace();
}
}
});
}
// start
public static void start(String file) throws ParserException, IOException {
for(int i = 0; i < urls.size();i++) {
BufferedWriter writer = new BufferedWriter(new FileWriter(file+"CUSIP"+i+".csv"));
StringBean sb = new StringBean ();
sb.setLinks(false);
sb.setReplaceNonBreakingSpaces(true);
sb.setCollapse(true);
sb.setURL (urls.get(i));
String toReduce = sb.getStrings ();
StringBuffer buffer = new StringBuffer(toReduce);
if(toReduce.contains("*CUSIP Detail information will be available when this issue settles.")) {
log.append("CUSIP Detail not available.For "+urls.get(i)+"\n");
}else {
buffer.delete(0, toReduce.indexOf("Cusip/ISIN")-1);
buffer.delete(buffer.indexOf("Underwriters:"), buffer.length());
String[] data = new String[13];
data[0] = buffer.substring(0, buffer.indexOf("Instrument Type:"));
data[1] = buffer.substring(buffer.indexOf("Instrument Type:"),buffer.indexOf("Call Type:"));
data[2] = buffer.substring(buffer.indexOf("Call Type:"),buffer.indexOf("Issue Date:"));
data[3] = buffer.substring(buffer.indexOf("Issue Date:"),buffer.indexOf("Issue Price:"));
data[4] = buffer.substring(buffer.indexOf("Issue Price:"),buffer.indexOf("Amount Issued:"));
data[5] = buffer.substring(buffer.indexOf("Amount Issued:"),buffer.indexOf("Lockout Period:"));
data[6] = buffer.substring(buffer.indexOf("Lockout Period:"),buffer.indexOf("Currency"));
data[7] = buffer.substring(buffer.indexOf("Currency"),buffer.indexOf("Denomination:"));
data[8] = buffer.substring(buffer.indexOf("Denomination:"),buffer.indexOf("First Payment"));
data[9] = buffer.substring(buffer.indexOf("First Payment"),buffer.indexOf("Maturity Date"));
data[10] = buffer.substring(buffer.indexOf("Maturity Date"),buffer.indexOf("Original Coupon:"));
data[11] = buffer.substring(buffer.indexOf("Original Coupon:"),buffer.indexOf("Current Coupon:"));
data[12] = buffer.substring(buffer.indexOf("Current Coupon:"),buffer.length());
for(String s : data) {
writer.write(s);
writer.write(",");
writer.newLine();
}
}
writer.flush();
writer.close();
log.append("Harvested: "+urls.get(i)+" successfully"+" \n");
}
log.append("Completed at: "+cal.get(Calendar.HOUR)+":"+cal.get(Calendar.MINUTE)+":"+cal.get(Calendar.SECOND) + "\n");
}
class infoListener implements ActionListener{
public void actionPerformed(ActionEvent arg0) {
String url = infoArea.getText();
StringTokenizer st = new StringTokenizer(url,",");
while(st.hasMoreTokens()) {
urls.add(st.nextToken());
}
log.append("Data Recieved at: "+cal.get(Calendar.HOUR)+":"+cal.get(Calendar.MINUTE)+":"+cal.get(Calendar.SECOND)+"\n");
}
}
public static void main(String[]args) {
javax.swing.SwingUtilities.invokeLater(new Runnable() {
public void run() {
new Extraction_GUI();
}
});
}
}
注意:您需要下载HTML Parser并将其添加到构建路径中。
答案 0 :(得分:3)
<强>首先强>
您应该在“CUSIP”之前的以下代码行中添加“/”:
BufferedWriter writer = new BufferedWriter(new FileWriter(file + "CUSIP" + i + ".csv"));
应该是
BufferedWriter writer = new BufferedWriter(new FileWriter(file + "/CUSIP" + i + ".csv"));
否则,您的程序找不到输出文件。
<强>第二强>
您应该传递以“,”分隔的网址。否则您的程序无法正确解析输入。
所以输入应该是:
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=04541GEL2,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31359T8L5,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31395RGT9,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=57643LJU1,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31358RRC9,
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=31392V6H0
<强>第三强>
您为输入指定的网址包含以下文字:
CUSIP: 31392V6H0*
*CUSIP Detail information will be available when this issue settles.
在这种情况下,您的程序不应将任何内容写入文件。
请参阅程序的以下部分:
if (toReduce.contains("*CUSIP Detail information will be available when this issue settles.")) {
log.append("CUSIP Detail not available.For " + urls.get(i) + "\n");
} else {
buffer.delete(0, toReduce.indexOf("Cusip/ISIN"));
buffer.delete(buffer.indexOf("Underwriters:"), buffer.length());
String[] data = new String[13];
data[0] = buffer.substring(0, buffer.indexOf("Instrument Type:"));
data[1] = buffer.substring(buffer.indexOf("Instrument Type:"), buffer.indexOf("Call Type:"));
data[2] = buffer.substring(buffer.indexOf("Call Type:"), buffer.indexOf("Issue Date:"));
data[3] = buffer.substring(buffer.indexOf("Issue Date:"), buffer.indexOf("Issue Price:"));
data[4] = buffer.substring(buffer.indexOf("Issue Price:"), buffer.indexOf("Amount Issued:"));
data[5] = buffer.substring(buffer.indexOf("Amount Issued:"), buffer.indexOf("Lockout Period:"));
data[6] = buffer.substring(buffer.indexOf("Lockout Period:"), buffer.indexOf("Currency"));
data[7] = buffer.substring(buffer.indexOf("Currency"), buffer.indexOf("Denomination:"));
data[8] = buffer.substring(buffer.indexOf("Denomination:"), buffer.indexOf("First Payment"));
data[9] = buffer.substring(buffer.indexOf("First Payment"), buffer.indexOf("Maturity Date"));
data[10] = buffer.substring(buffer.indexOf("Maturity Date"), buffer.indexOf("Original Coupon:"));
data[11] = buffer.substring(buffer.indexOf("Original Coupon:"), buffer.indexOf("Current Coupon:"));
data[12] = buffer.substring(buffer.indexOf("Current Coupon:"), buffer.length());
for (String s : data) {
writer.write(s);
writer.write(",");
writer.newLine();
}
}
<强>第四强>
我已使用默认网址启动您的程序:
http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=3133F0GM5
它将所有数据写入文件CUSIP0.csv
。
这是输出:
Cusip/ISIN:
3133F0GM5
US3133F0GM57
,
Instrument Type:
Freddie Notes
,
Call Type:
American
,
Issue Date:
10/18/2001
,
Issue Price:
100.0
,
Amount Issued:
$20,809,000
,
Lockout Period:
2 Year(s)
,
Currency:
USD
,
Denomination:
$1,000
,
First Payment:
11/15/2001
,
Maturity Date:
10/15/2011
,
Original Coupon:
5.250%
,
Current Coupon:
5.250%
,
唯一截断的部分如下:
Underwriters: LASALLE FINANCIAL SERVICES, INC.
这是因为,你忘了解析它。
<强>结论:强>
以下代码有效,但您可能需要将更多数据写入输出文件。 您应该考虑如何正确删除结尾。
package com.js.extract;
import java.io.*;
import java.util.*;
import javax.swing.*;
import java.awt.*;
import java.awt.event.*;
import org.htmlparser.beans.StringBean;
import org.htmlparser.util.ParserException;
public class Extraction_GUI extends JFrame {
/**
*
*/
private static final long serialVersionUID = 1L;
protected JButton start;
protected JButton sendInfo;
protected JTextArea infoArea;
protected static JTextArea log;
protected JScrollPane sp;
protected JScrollPane sp2;
protected JSplitPane pane;
protected JPanel mainPanel;
protected JPanel aPanel;
protected JPanel lPanel;
protected int areaStatus = 0;
static protected Calendar cal = Calendar.getInstance();
protected static ArrayList<String> urls = new ArrayList<String>();
public Extraction_GUI() {
super("Extraction by Jeel Shah");
setSize(660, 520);
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
setLocationRelativeTo(null);
setResizable(false);
mainPanel = new JPanel();
mainPanel.setLayout(new FlowLayout());
aPanel = new JPanel();
aPanel.setLayout(new FlowLayout());
lPanel = new JPanel();
lPanel.setLayout(new FlowLayout());
start = new JButton("start");
sendInfo = new JButton("send info");
infoArea = new JTextArea("http://www.freddiemac.com/debt/data/cgi-bin/cusipdetail.cgi?cusip=3133F0GM5", 13, 55);
log = new JTextArea(10, 55);
log.setEditable(false);
sp = new JScrollPane(infoArea);
sp.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
sp.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
sp2 = new JScrollPane(log);
sp2.setHorizontalScrollBarPolicy(JScrollPane.HORIZONTAL_SCROLLBAR_ALWAYS);
sp2.setVerticalScrollBarPolicy(JScrollPane.VERTICAL_SCROLLBAR_ALWAYS);
aPanel.add(sp);
lPanel.add(sp2);
pane = new JSplitPane(JSplitPane.VERTICAL_SPLIT, aPanel, lPanel);
mainPanel.add(pane);
mainPanel.add(start);
mainPanel.add(sendInfo);
add(mainPanel);
setVisible(true);
final String toSave = JOptionPane.showInputDialog(null, "Please enter where you would like to save your files: ");
log.append("Data will be saved to: " + toSave + "\n");
sendInfo.addActionListener(new infoListener());
start.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
try {
start(toSave);
} catch (ParserException e1) {
e1.printStackTrace();
} catch (IOException e2) {
e2.printStackTrace();
}
}
});
}
// start
public static void start(String file) throws ParserException, IOException {
for (int i = 0; i < urls.size(); i++) {
// First mistake was in the following line.
BufferedWriter writer = new BufferedWriter(new FileWriter(file + "/CUSIP" + i + ".csv"));
StringBean sb = new StringBean();
sb.setLinks(false);
sb.setReplaceNonBreakingSpaces(true);
sb.setCollapse(true);
sb.setURL(urls.get(i));
String toReduce = sb.getStrings();
System.out.println("toReduce = \n" + toReduce);
StringBuffer buffer = new StringBuffer(toReduce);
if (toReduce.contains("*CUSIP Detail information will be available when this issue settles.")) {
log.append("CUSIP Detail not available.For " + urls.get(i) + "\n");
} else {
buffer.delete(0, toReduce.indexOf("Cusip/ISIN")-1);
// The following line removes info about Underwriters from parsing. So, I commented it.
//buffer.delete(buffer.indexOf("Underwriters:"), buffer.length());
String[] data = new String[14];
data[0] = buffer.substring(0, buffer.indexOf("Instrument Type:"));
data[1] = buffer.substring(buffer.indexOf("Instrument Type:"), buffer.indexOf("Call Type:"));
data[2] = buffer.substring(buffer.indexOf("Call Type:"), buffer.indexOf("Issue Date:"));
data[3] = buffer.substring(buffer.indexOf("Issue Date:"), buffer.indexOf("Issue Price:"));
data[4] = buffer.substring(buffer.indexOf("Issue Price:"), buffer.indexOf("Amount Issued:"));
data[5] = buffer.substring(buffer.indexOf("Amount Issued:"), buffer.indexOf("Lockout Period:"));
data[6] = buffer.substring(buffer.indexOf("Lockout Period:"), buffer.indexOf("Currency"));
data[7] = buffer.substring(buffer.indexOf("Currency"), buffer.indexOf("Denomination:"));
data[8] = buffer.substring(buffer.indexOf("Denomination:"), buffer.indexOf("First Payment"));
data[9] = buffer.substring(buffer.indexOf("First Payment"), buffer.indexOf("Maturity Date"));
data[10] = buffer.substring(buffer.indexOf("Maturity Date"), buffer.indexOf("Original Coupon:"));
data[11] = buffer.substring(buffer.indexOf("Original Coupon:"), buffer.indexOf("Current Coupon:"));
// Some changes are made in that line:
data[12] = buffer.substring(buffer.indexOf("Current Coupon:"), buffer.indexOf("Underwriters:"));
// This line is added.
data[13] = buffer.substring(buffer.indexOf("Underwriters:"), buffer.length());
for (String s : data) {
writer.write(s);
writer.write(",");
writer.newLine();
}
}
writer.flush();
writer.close();
log.append("Harvested: " + urls.get(i) + " successfully" + " \n");
}
log.append("Completed at: " + cal.get(Calendar.HOUR) + ":" + cal.get(Calendar.MINUTE) + ":" + cal.get(Calendar.SECOND) + "\n");
}
class infoListener implements ActionListener {
public void actionPerformed(ActionEvent arg0) {
String url = infoArea.getText();
StringTokenizer st = new StringTokenizer(url, ",");
while (st.hasMoreTokens()) {
urls.add(st.nextToken());
}
log.append("Data Recieved at: " + cal.get(Calendar.HOUR) + ":" + cal.get(Calendar.MINUTE) + ":" + cal.get(Calendar.SECOND) + "\n");
}
}
public static void main(String[] args) {
javax.swing.SwingUtilities.invokeLater(new Runnable() {
public void run() {
new Extraction_GUI();
}
});
}
}