对于一个类项目,我正在尝试为我的Java类编写一个简单的RSS阅读器。尽管我知道有更好的更有效的方法和工具,但我仍在尝试遍历DOM树以获得经验。我有一个ReaderObject,它获取基本的标题,链接,描述和列表,以保存具有实例变量标题,链接,描述,发布日期和guid的RSSItem对象。我希望这些信息能够以一种很好的方式进行解析并重新显示。我被RSSItem部分卡住了,因为那里的文本是空白的。我也不知道这样做是否可行,是否完全了解...
另一个问题是,当您获得getChildNodes,然后通过for循环获得每个项目时,为什么此时需要getFirstChild。我是从书本示例中得到的,但我不知道为什么。
这是我的代码:
Code:
import java.io.*;
import java.util.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
public class RSSReader {
public static void main(String[] args) {
File f = new File("testrss.xml");
if (f.isFile()) {
System.out.println("is File");
RSSReader xml = new RSSReader(f);
}
}
public RSSReader(File xmlFile) {
try {
obj = new ReaderObject();
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(xmlFile); // Document extends Node
List<Node> nodeList = new ArrayList<Node>();
nodeList.add(doc);
while (nodeList.size() > 0)
{
Node node = nodeList.get(0);
// if(node instanceof Document)
// System.out.println("Document Node");
// Get entries in the xml file
if (node.hasChildNodes()) {
NodeList nl = node.getChildNodes();
for(int i = 0; i < nl.getLength(); i++) {
if (nl.item(i) instanceof Element) {
Element childElement = (Element) nl.item(i);
nodeList.add(childElement);
//nodeList.add(nl.item(i));
}
}
}
if (node instanceof Element) {
// Print out the element tag name
System.out.println("Element Node: " + ((Element)node).getTagName());
// Print out the attributes of the element
if (node.hasAttributes()) {
NamedNodeMap attrMap = node.getAttributes();
for (int i = 0; i < attrMap.getLength(); i++) {
Attr attribute = (Attr) attrMap.item(i);
System.out.print("\tAttribute Key: " + attribute.getName() + " Value: " + attribute.getValue());
}
System.out.println();
}
// Get children of node
if (node.hasChildNodes()) {
NodeList childrenList = node.getChildNodes();
for (int j = 0; j < childrenList.getLength(); j++) {
Node child = childrenList.item(j);
Element childElement;
Text textNode;
if (child instanceof Element) {
childElement = (Element) child;
textNode = (Text) childElement.getFirstChild();
String text = textNode.getData().trim();
if (childElement.getTagName().toLowerCase().equals("title")) {
obj.setTitle(text);
System.out.println("Title: " + obj.getTitle());
}
else if (childElement.getTagName().toLowerCase().equals("link")) {
obj.setLink(text);
System.out.println("Link: " + obj.getLink());
}
else if (childElement.getTagName().toLowerCase().equals("description")) {
obj.setDescription(text);
System.out.println("Description: " + obj.getDescription());
}
else if (childElement.getTagName().toLowerCase().equals("item")) {
RSSItem item = new RSSItem();
System.out.println("item text: " + text); // STUCK HERE
item.setTitle(text);
System.out.println("RSS Item title: " + item.getTitle());
}
}
}
}
}
nodeList.remove(0);
}
}
catch (IOException e) {
e.printStackTrace();
}
catch (SAXException e) {
e.printStackTrace();
}
catch (IllegalArgumentException e) {
e.printStackTrace();
}
catch (ParserConfigurationException e) {
e.printStackTrace();
}
}
private ReaderObject obj;
}
class ReaderObject {
public ReaderObject() {
this.title = "";
this.link = "";
this.description = "";
}
public ReaderObject(String title, String link, String description) {
this.title = title;
this.link = link;
this.description = description;
}
public void setTitle(String title) {
this.title = title;
}
public void setLink(String link) {
this.link = link;
}
public void setDescription(String description) {
this.description = description;
}
public String getTitle() {
return title;
}
public String getLink() {
return link;
}
public String getDescription() {
return description;
}
private String title;
private String link;
private String description;
private List<RSSItem> items = new ArrayList<RSSItem>();
}
class RSSItem {
public RSSItem() {
this.title = "";
this.link = "";
this.description = "";
this.pubDate = "";
this.guid = "";
}
public RSSItem(String title, String link, String description, String item, String pubDate, String guid) {
this.title = title;
this.link = link;
this.description = description;
this.pubDate = pubDate;
this.guid = guid;
}
public void setTitle(String title) {
this.title = title;
}
public void setLink(String link) {
this.link = link;
}
public void setDescription(String description) {
this.description = description;
}
public void setPubDate(String pubDate) {
this.pubDate = pubDate;
}
public void setGuid(String guid) {
this.guid = guid;
}
public String getTitle() {
return title;
}
private String title;
private String link;
private String description;
private String pubDate;
private String guid;
}
Output:
is File
Element Node: rss
Attribute Key: version Value: 2.0
Element Node: channel
Title: Liftoff News
Link: http://liftoff.msfc.nasa.gov/
Description: Liftoff to Space Exploration.
item text:
RSS Item title:
item text:
RSS Item title:
item text:
RSS Item title:
item text:
RSS Item title:
Element Node: title
Element Node: link
Element Node: description
Element Node: language
Element Node: pubDate
Element Node: lastBuildDate
Element Node: docs
Element Node: generator
Element Node: managingEditor
Element Node: webMaster
Element Node: item
Title: Star City
Link: http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp
Description: How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.
Element Node: item
Description: Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.
Element Node: item
Title: The Engine That Does More
Link: http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp
Description: Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.
Element Node: item
Title: Astronauts' Dirty Laundry
Link: http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp
Description: Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.
Element Node: title
Element Node: link
Element Node: description
Element Node: pubDate
Element Node: guid
Element Node: description
Element Node: pubDate
Element Node: guid
Element Node: title
Element Node: link
Element Node: description
Element Node: pubDate
Element Node: guid
Element Node: title
Element Node: link
Element Node: description
Element Node: pubDate
Element Node: guid
XML Code:
<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>Liftoff to Space Exploration.</description>
<language>en-us</language>
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<generator>Weblog Editor 2.0</generator>
<managingEditor>[email protected]</managingEditor>
<webMaster>[email protected]</webMaster>
<item>
<title>Star City</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
</item>
<item>
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
</item> <item> <title>The Engine That Does More</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
</item> <item> <title>Astronauts' Dirty Laundry</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description> <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
</item>
</channel>
</rss>
最佳答案
方法的问题在于,您在检查每个元素时都没有考虑其层次结构。例如,当您看到一个元素时,它可以是频道的标题或项目的标题。
我认为处理DOM树的最佳方法是自上而下的算法:
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
public class RSSReader {
public static void main(String[] args) {
try {
File f = new File("testrss.xml");
if (f.isFile()) {
System.out.println("is File");
List<RSSChannel> channels = read(f);
for (RSSChannel channel: channels) {
System.out.println("Channel: ");
System.out.println(" title: " + channel.getTitle());
System.out.println(" link: " + channel.getLink());
System.out.println(" description: " + channel.getDescription());
for (RSSItem item: channel.getItems()) {
System.out.println(" Item: ");
System.out.println(" title: " + item.getTitle());
System.out.println(" link: " + item.getLink());
System.out.println(" description: " + item.getDescription());
System.out.println(" pubDate: " + item.getPubDate());
System.out.println(" guid: " + item.getGuid());
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static List<RSSChannel> read(File xmlFile)
throws ParserConfigurationException, SAXException, IOException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(xmlFile); // Document extends Node
Element root = doc.getDocumentElement();
if (!root.getTagName().equalsIgnoreCase("rss")) {
throw new IOException("Invalid RSS document");
}
return readChannels(root.getChildNodes());
}
private static List<RSSChannel> readChannels(NodeList nodes) {
List<RSSChannel> result = new ArrayList<RSSChannel>();
for (int i = 0; i < nodes.getLength(); ++i) {
Node node = nodes.item(i);
if (node instanceof Element) {
Element elem = (Element)node;
if (elem.getTagName().equalsIgnoreCase("channel")) {
result.add(readChannel(elem.getChildNodes()));
}
}
}
return result;
}
private static RSSChannel readChannel(NodeList nodes) {
RSSChannel channel = new RSSChannel();
for (int i = 0; i < nodes.getLength(); ++i) {
Node node = nodes.item(i);
if (node instanceof Element) {
Element elem = (Element)node;
if (elem.getTagName().equalsIgnoreCase("title")) {
channel.setTitle(getText(elem));
} else if (elem.getTagName().equalsIgnoreCase("link")) {
channel.setLink(getText(elem));
} else if (elem.getTagName().equalsIgnoreCase("description")) {
channel.setDescription(getText(elem));
} else if (elem.getTagName().equalsIgnoreCase("item")) {
channel.addItem(readItem(elem.getChildNodes()));
}
}
}
return channel;
}
private static RSSItem readItem(NodeList nodes) {
RSSItem item = new RSSItem();
for (int i = 0; i < nodes.getLength(); ++i) {
Node node = nodes.item(i);
if (node instanceof Element) {
Element elem = (Element)node;
if (elem.getTagName().equalsIgnoreCase("title")) {
item.setTitle(getText(elem));
} else if (elem.getTagName().equalsIgnoreCase("link")) {
item.setLink(getText(elem));
} else if (elem.getTagName().equalsIgnoreCase("description")) {
item.setDescription(getText(elem));
} else if (elem.getTagName().equalsIgnoreCase("pubDate")) {
item.setPubDate(getText(elem));
} else if (elem.getTagName().equalsIgnoreCase("guid")) {
item.setGuid(getText(elem));
}
}
}
return item;
}
private static String getText(Element elm) {
Node node = elm.getFirstChild();
if (!(node instanceof Text)) {
return null;
}
Text text = (Text)node;
return text.getData().trim();
}
public static class RSSChannel {
public RSSChannel() {
this.title = "";
this.link = "";
this.description = "";
}
public RSSChannel(String title, String link, String description) {
this.title = title;
this.link = link;
this.description = description;
}
public void setTitle(String title) {
this.title = title;
}
public void setLink(String link) {
this.link = link;
}
public void setDescription(String description) {
this.description = description;
}
public String getTitle() {
return title;
}
public String getLink() {
return link;
}
public String getDescription() {
return description;
}
public List<RSSItem> getItems() {
return new ArrayList<RSSItem>(items);
}
public void addItem(RSSItem item) {
items.add(item);
}
private String title;
private String link;
private String description;
private List<RSSItem> items = new ArrayList<RSSItem>();
}
public static class RSSItem {
public RSSItem() {
this.title = "";
this.link = "";
this.description = "";
this.pubDate = "";
this.guid = "";
}
public RSSItem(String title, String link, String description, String item, String pubDate, String guid) {
this.title = title;
this.link = link;
this.description = description;
this.pubDate = pubDate;
this.guid = guid;
}
public void setTitle(String title) {
this.title = title;
}
public void setLink(String link) {
this.link = link;
}
public void setDescription(String description) {
this.description = description;
}
public void setPubDate(String pubDate) {
this.pubDate = pubDate;
}
public void setGuid(String guid) {
this.guid = guid;
}
public String getTitle() {
return title;
}
public String getDescription() {
return description;
}
public String getGuid() {
return guid;
}
public String getLink() {
return link;
}
public String getPubDate() {
return pubDate;
}
private String title;
private String link;
private String description;
private String pubDate;
private String guid;
}
}