我正在尝试使用SAX Parser来解析XML文件。虽然我在代码中一直收到以下错误:
Exception in thread "main" java.net.MalformedURLException: unknown protocol: c
at java.net.URL.<init>(URL.java:592)
at java.net.URL.<init>(URL.java:482)
at java.net.URL.<init>(URL.java:431)
at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(XMLEntityManager.java:605)
at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(XMLVersionDetector.java:189)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:799)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:764)
at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:123)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1137)
at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:580)
at main.main(main.java:28)
Java Result: 1
这是我的处理程序类:
public class MySaxParser extends DefaultHandler {
Index i = new Index(12);
String bookxmlfilename;
String tmpValue;
BookCitation c;
JournalArticle j;
Unpublished u;
ConfProceedings p;
public MySaxParser() {
}
@Override
public void startElement(String s, String s1, String elementName, Attributes attr) throws SAXException {
if (elementName.equalsIgnoreCase("JournalArticle")) {
if (elementName.equalsIgnoreCase("Pages")) {
j.setstartPage(Integer.parseInt(attr.getValue("StartPage")));
j.setendPage(Integer.parseInt(attr.getValue("EndPage")));
}
}
if (elementName.equalsIgnoreCase("ConferenceProceedings")) {
if (elementName.equalsIgnoreCase("Pages")) {
p.setstartPage(Integer.parseInt(attr.getValue("StartPage")));
p.setendPage(Integer.parseInt(attr.getValue("EndPage")));
}
}
}
@Override
public void endElement(String s, String s1, String element) throws SAXException {
if (element.equalsIgnoreCase("book")) {
i.addCitation(c);
if (element.equalsIgnoreCase("name")) {
c.setName(tmpValue);
}
if (element.equalsIgnoreCase("publisher")) {
c.setpublisher(tmpValue);
}
if (element.equalsIgnoreCase("publicationDate")) {
c.setdateOfPublication(tmpValue);
}
if (element.equalsIgnoreCase("authors")) {
if (element.equalsIgnoreCase("author")) {
c.addAuthor(tmpValue);
}
}
if (element.equalsIgnoreCase("keywords")) {
if (element.equalsIgnoreCase("keyword")) {
c.addKeyword(tmpValue);
}
}
}
if (element.equalsIgnoreCase("JournalArticle")) {
i.addCitation(j);
if (element.equalsIgnoreCase("name")) {
j.setName(tmpValue);
}
if (element.equalsIgnoreCase("TitleOfJournal")) {
j.settitleOfJournal(tmpValue);
}
if (element.equalsIgnoreCase("TitleOfJournal")) {
j.settitleOfJournal(tmpValue);
}
if (element.equalsIgnoreCase("PublicationDate")) {
j.setpublicationDate(tmpValue);
}
if (element.equalsIgnoreCase("volNumber")) {
j.setvolNumber(Integer.parseInt(tmpValue));
}
if (element.equalsIgnoreCase("IssueNumber")) {
j.setissueNumber(Integer.parseInt(tmpValue));
}
if (element.equalsIgnoreCase("authors")) {
if (element.equalsIgnoreCase("author")) {
j.addAuthor(tmpValue);
}
if (element.equalsIgnoreCase("keywords")) {
if (element.equalsIgnoreCase("keyword")) {
j.addKeyword(tmpValue);
}
}
}
}
if (element.equalsIgnoreCase("Unpublished")) {
i.addCitation(u);
if (element.equalsIgnoreCase("name")) {
u.setName(tmpValue);
}
if (element.equalsIgnoreCase("authors")) {
if (element.equalsIgnoreCase("author")) {
u.addAuthor(tmpValue);
}
if (element.equalsIgnoreCase("keywords")) {
if (element.equalsIgnoreCase("keyword")) {
u.addKeyword(tmpValue);
}
}
}
}
if (element.equalsIgnoreCase("ConferenceProceedings")) {
i.addCitation(p);
if (element.equalsIgnoreCase("name")) {
p.setName(tmpValue);
}
if (element.equalsIgnoreCase("publisher")) {
p.setpublisher(tmpValue);
}
if (element.equalsIgnoreCase("ConferenceLocation")) {
p.setlocationOfConference(tmpValue);
}
if (element.equalsIgnoreCase("TitleOfConferenceproceeding")) {
p.settitleOfConferenceProc(tmpValue);
}
if (element.equalsIgnoreCase("ConferenceYea")) {
p.setconfYear(Integer.parseInt(tmpValue));
}
if (element.equalsIgnoreCase("Editor")) {
p.seteditor(tmpValue);
}
if (element.equalsIgnoreCase("authors")) {
if (element.equalsIgnoreCase("author")) {
p.addAuthor(tmpValue);
}
if (element.equalsIgnoreCase("keywords")) {
if (element.equalsIgnoreCase("keyword")) {
p.addKeyword(tmpValue);
}
}
}
}
if (element.equalsIgnoreCase("FormattingStyle")) {
i.setFormatType("IEEE");
}
if (element.equalsIgnoreCase("FormattingStyle")) {
try {
i.formatIEEE(tmpValue);
} catch (IOException ex) {
Logger.getLogger(MySaxParser.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
@Override
public void characters(char[] ac, int i, int j) throws SAXException {
tmpValue = new String(ac, i, j);
}
}
这是我的主要课程:
public class main {
public static void main(String[] args) throws IOException, ParserConfigurationException, SAXException {
// Create scanner
Scanner OswegoNote = new Scanner(System.in);
//Create a parser factory
SAXParserFactory factory = SAXParserFactory.newInstance();
//Make the parser
SAXParser saxParser = factory.newSAXParser();
XMLReader parser = saxParser.getXMLReader();
//Create a handler
MySaxParser handler = new MySaxParser();
//Tell the parser to use this handler
parser.setContentHandler(handler);
//read and parse the document
parser.parse("C:\\Users\\mhromalik\\Documents\\Suny Oswego\\fall2013\\csc241fall2012\\Assignment\\MyCitation.html");
}
}
这是我的XML文件的一部分:
<Citation>
<ConferenceProceedings>
<Name>An efficient implementation of Smith Waterman algorithm on GPU using CUDA, for massively parallel scanning of sequence databases</Name>
<Publisher>Parallel and Distributed Processing</Publisher>
<ConferenceLocation>Austin,TX</ConferenceLocation>
<TitleOfConferenceproceeding> IEEE International Conference on Parallel and Distributed Processing</TitleOfConferenceproceeding>
<ConferenceYear>2009</ConferenceYear>
<Editor>S. M. Mann</Editor>
<Pages StartPage="85" EndPage="102"/>
<Authors>
<author>L. L. Ligowski</author>
<author>W. A. Rudnicki</author>
</Authors>
<Keywords>
<Keyword>Sparse Data</Keyword>
<Keyword>DNA</Keyword>
<Keyword>GPU</Keyword>
<Keyword>Data Mining</Keyword>
</Keywords>
</ConferenceProceedings>
</Citation>
<FormattingStyle>IEEE</FormattingStyle>
<FilePath>C:\\Users\\mhromalik\\Documents\\Suny Oswego\\fall2013\\csc241fall2012\\Assignment\\MyCitation.html</FilePath>
</Index>
我无法弄清楚为什么会发生这种错误。任何帮助将不胜感激!
答案 0 :(得分:11)
设置html文件的路径时,您缺少协议。当您尝试读取本地html文件时,可以使用file
协议:
file:///{yourfilepath}
答案 1 :(得分:3)
parser.parse()需要一个URI,而不是文件名。您可以使用
从Java中获取文件名到URInew File(filename).toURI()