import org.apache.xerces.parsers.*; import org.w3c.dom.*; import org.xml.sax.*; import java.io.IOException; import java.util.StringTokenizer; /** * DOM - example that counts the words of an xml input file * Call: % java DOMWordCount inputFile.xml */ public class DOMWordCount { public static void main(String[] args) { DOMParser parser = new DOMParser(); DOMWordCount counter = new DOMWordCount(); for (int i = 0; i < args.length; i++) { try { // Read the entire document into memory parser.parse(args[i]); Document d = parser.getDocument(); int numWords = countWordsInNode(d); System.out.println(numWords + " words"); } catch (SAXException e) { System.err.println(e); } catch (IOException e) { System.err.println(e); } } } // end main // note use of recursion public static int countWordsInNode(Node node) { int numWords = 0; if (node.hasChildNodes()) { NodeList children = node.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { numWords += countWordsInNode(children.item(i)); } } int type = node.getNodeType(); if (type == Node.TEXT_NODE) { String s = node.getNodeValue(); numWords += countWordsInString(s); } return numWords; } private static int countWordsInString(String s) { if (s == null) return 0; s = s.trim(); if (s.length() == 0) return 0; StringTokenizer st = new StringTokenizer(s); return st.countTokens(); } }