我想将Sitemap拆分为Sitemap,如果它超过maxURLs
。如果Sitemap具有多个网址,则以下示例应拆分该网站地图。
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.CharacterData;
import org.w3c.dom.*;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
public class SiteMapSplitter {
public static void main(String[] args){
String sitemapStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n" +
"<url>\n" +
"<loc>test1.html</loc>\n" +
"<lastmod>today</lastmod>\n" +
"<changefreq>daily</changefreq>\n" +
"<priority>1.0</priority>\n" +
"</url>\n" +
"<url>\n" +
"<loc>test2.html</loc>\n" +
"<lastmod>yesterday</lastmod>\n" +
"<changefreq>daily</changefreq>\n" +
"<priority>1.0</priority>\n" +
"</url></urlset>";
try {
splitSitemap(sitemapStr);
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
}
static private void splitSitemap(String sitemapStr) throws ParserConfigurationException {
DocumentBuilder db = null;
try {
db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(sitemapStr));
Document doc = null;
try {
doc = db.parse(is);
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
NodeList nodes = doc.getElementsByTagName("url");
int maxURLs = 1;
Set<String> smURLsSet= new HashSet<String>();
if (nodes.getLength()>maxURLs){
for (int i = 0; i < nodes.getLength(); i++) {
StringBuilder smURLsBuilder = new StringBuilder("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n");
for (int k = 0; k<maxURLs; k++){
Element element = (Element) nodes.item(i);
smURLsBuilder.append(element);
}
smURLsSet.add(smURLsBuilder.toString());
}
Iterator i = smURLsSet.iterator();
while(i.hasNext()){
System.out.println(i.next());
}
}
}
}
问题在于Element element = (Element) nodes.item(i); smURLsBuilder.append(element);
不会将整个元素(在本例中为url
及其子绿色)附加到smURLsBuilder
。这该怎么做?
答案 0 :(得分:0)
您应该考虑对站点地图使用面向对象的方法。使用数据绑定(JAXB)或使用data projection更短(披露:我与该项目有关)。这样您就不需要通过字符串连接来创建XML。
public class SitemapSplitter {
static String sitemapStr = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n" +
"<url>\n" +
"<loc>test1.html</loc>\n" +
"<lastmod>today</lastmod>\n" +
"<changefreq>daily</changefreq>\n" +
"<priority>1.0</priority>\n" +
"</url>\n" +
"<url>\n" +
"<loc>test2.html</loc>\n" +
"<lastmod>yesterday</lastmod>\n" +
"<changefreq>daily</changefreq>\n" +
"<priority>1.0</priority>\n" +
"</url></urlset>";
public interface Sitemap {
@XBWrite("/urlset/url")
Sitemap setUrls(List<? extends Node> urls);
}
public static void main(String... args) {
XBProjector projector = new XBProjector(Flags.TO_STRING_RENDERS_XML);
// Get all urls from existing sitemap.
List<Node> urlNodes = projector.onXMLString(sitemapStr).evalXPath("/xbdefaultns:urlset/xbdefaultns:url").asListOf(Node.class);
for (Node urlNode: urlNodes) {
// Create a new sitemap, here with only one url
Sitemap newSitemap = projector.onXMLString(sitemapStr).createProjection(Sitemap.class).setUrls(Collections.singletonList(urlNode));
System.out.println(newSitemap);
}
}
}
此程序打印出来
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>test1.html</loc>
<lastmod>today</lastmod>
<changefreq>daily</changefreq>
<priority>1.0</priority>
</url>
</urlset>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>test2.html</loc>
<lastmod>yesterday</lastmod>
<changefreq>daily</changefreq>
<priority>1.0</priority>
</url>
</urlset>