XmlUtils.java

  1. /*
  2.  * XmlUtils
  3.  *
  4.  * $Id$
  5.  * $HeadURL$
  6.  */
  7. package gov.usgs.util;

  8. import java.io.IOException;
  9. import java.io.InputStream;
  10. import java.util.Calendar;
  11. import java.util.Date;
  12. import java.util.GregorianCalendar;
  13. import java.util.HashMap;
  14. import java.util.Iterator;
  15. import java.util.Map;
  16. import java.util.TimeZone;

  17. import javax.xml.datatype.DatatypeConstants;
  18. import javax.xml.datatype.XMLGregorianCalendar;
  19. import javax.xml.datatype.DatatypeFactory;

  20. import javax.xml.parsers.ParserConfigurationException;
  21. import javax.xml.parsers.SAXParser;
  22. import javax.xml.parsers.SAXParserFactory;

  23. import org.xml.sax.SAXException;
  24. import org.xml.sax.Attributes;
  25. import org.xml.sax.InputSource;
  26. import org.xml.sax.XMLReader;
  27. import org.xml.sax.helpers.DefaultHandler;

  28. /**
  29.  * Xml parsing utility functions.
  30.  *
  31.  * @author jmfee
  32.  *
  33.  */
  34. public class XmlUtils {

  35.     /** Hashmap of ESCAPES */
  36.     public static final Map<String, String> ESCAPES = new HashMap<String, String>();
  37.     static {
  38.         // xml
  39.         ESCAPES.put("&", "&amp;");
  40.         ESCAPES.put("<", "&lt;");
  41.         ESCAPES.put(">", "&gt;");
  42.         ESCAPES.put("\"", "&quot;");
  43.         ESCAPES.put("'", "&apos;");
  44.         // whitespace characters
  45.         ESCAPES.put("\t", "&#x9;"); // tab
  46.         ESCAPES.put("\n", "&#xA;"); // newline
  47.         ESCAPES.put("\r", "&#xD;"); // carriage return
  48.     }

  49.     /**
  50.      * Convenience method to format a Date as an XML DateTime String.
  51.      *
  52.      * @param date
  53.      *            the date to format.
  54.      * @return the XML representation as a string.
  55.      */
  56.     public static String formatDate(final Date date) {
  57.         if (date == null) {
  58.             return null;
  59.         }
  60.         GregorianCalendar calendar = new GregorianCalendar();
  61.         calendar.setTimeInMillis(date.getTime());
  62.         return formatGregorianCalendar(calendar);
  63.     }

  64.     /**
  65.      * Format a Gregorian Calendar as an XML DateTime String.
  66.      *
  67.      * @param calendar
  68.      *            the calendar to format.
  69.      * @return the XML representation as a string.
  70.      */
  71.     public static String formatGregorianCalendar(
  72.             final GregorianCalendar calendar) {
  73.         try {
  74.             return DatatypeFactory.newInstance()
  75.                     .newXMLGregorianCalendar(calendar).normalize()
  76.                     .toXMLFormat();
  77.         } catch (Exception e) {
  78.             return null;
  79.         }
  80.     }

  81.     /**
  82.      * Convenience method to parse an XML Date Time into a Date. Only useful
  83.      * when the XML Date Time is within the Date object time range.
  84.      *
  85.      * @param toParse
  86.      *            the xml date time string to parse.
  87.      * @return the parsed Date object.
  88.      */
  89.     public static Date getDate(final String toParse) {
  90.         XMLGregorianCalendar calendar = getXMLGregorianCalendar(toParse);
  91.         if (calendar != null) {
  92.             return new Date(calendar.toGregorianCalendar().getTimeInMillis());
  93.         } else {
  94.             return null;
  95.         }
  96.     }

  97.     /**
  98.      * Parse an XML Date Time into an XMLGregorianCalendar.
  99.      *
  100.      * @param toParse
  101.      *            the xml date time string to parse.
  102.      * @return the parsed XMLGregorianCalendar object.
  103.      */
  104.     public static XMLGregorianCalendar getXMLGregorianCalendar(
  105.             final String toParse) {
  106.         try {
  107.             return DatatypeFactory.newInstance().newXMLGregorianCalendar(
  108.                     toParse);
  109.         } catch (Exception e) {
  110.             return null;
  111.         }
  112.     }

  113.     /**
  114.      * Converts an XMLGregorianCalendar to a Date.
  115.      *
  116.      * @param xmlDate
  117.      *            XMLGregorianCalendar to convert.
  118.      * @return corresponding date object.
  119.      */
  120.     public static Date getDate(final XMLGregorianCalendar xmlDate) {
  121.         // is this equivalent to getDate(String) processing above??

  122.         // start with UTC, i.e. no daylight savings time.
  123.         TimeZone timezone = TimeZone.getTimeZone("GMT");

  124.         // adjust timezone to match xmldate
  125.         int offsetMinutes = xmlDate.getTimezone();
  126.         if (offsetMinutes != DatatypeConstants.FIELD_UNDEFINED) {
  127.             timezone.setRawOffset(
  128.             // convert minutes to milliseconds
  129.             offsetMinutes * 60 // seconds per minute
  130.             * 1000 // milliseconds per second
  131.             );
  132.         }

  133.         // use calendar so parsed date will be UTC
  134.         Calendar calendar = Calendar.getInstance(timezone);
  135.         calendar.clear();
  136.         calendar.set(xmlDate.getYear(),
  137.                 // xmlcalendar is 1 based, calender is 0 based
  138.                 xmlDate.getMonth() - 1, xmlDate.getDay(), xmlDate.getHour(),
  139.                 xmlDate.getMinute(), xmlDate.getSecond());
  140.         Date date = calendar.getTime();
  141.         int millis = xmlDate.getMillisecond();
  142.         if (millis != DatatypeConstants.FIELD_UNDEFINED) {
  143.             calendar.setTimeInMillis(calendar.getTimeInMillis() + millis);
  144.         }

  145.         return date;
  146.     }

  147.     /**
  148.      * Creates an XMLReader and uses handler as a content and error handler.
  149.      *
  150.      * @param xml
  151.      *            source of xml.
  152.      * @param handler
  153.      *            SAX handler for xml.
  154.      * @throws SAXException
  155.      *             if any exceptions occur during parsing.
  156.      * @throws IOException
  157.      *             if unable to convert xml to an inputstream.
  158.      * @throws ParserConfigurationException
  159.      *             if unable to create a namespace aware parser.
  160.      */
  161.     public static void parse(final Object xml, final DefaultHandler handler)
  162.             throws SAXException, IOException, ParserConfigurationException {
  163.         SAXParserFactory spf = SAXParserFactory.newInstance();
  164.         spf.setNamespaceAware(true);
  165.         SAXParser sp = spf.newSAXParser();
  166.         XMLReader xr = sp.getXMLReader();
  167.         xr.setContentHandler(handler);
  168.         xr.setErrorHandler(handler);
  169.         InputStream in = StreamUtils.getInputStream(xml);
  170.         try {
  171.             xr.parse(new InputSource(in));
  172.         } finally {
  173.             StreamUtils.closeStream(in);
  174.         }
  175.     }

  176.     /**
  177.      * Sometimes parsers do not preserve the namespace for attributes. This
  178.      * attempts to use the namespace and localname, and, if not available using
  179.      * the namespace, checks for an attribute using only localname.
  180.      *
  181.      * @param attributes
  182.      *            Attributes object to search.
  183.      * @param uri
  184.      *            namespace of attribute.
  185.      * @param localName
  186.      *            local name of attribute.
  187.      * @return value of attribute.
  188.      */
  189.     public static String getAttribute(final Attributes attributes,
  190.             final String uri, final String localName) {
  191.         String value = attributes.getValue(uri, localName);
  192.         if (value == null) {
  193.             value = attributes.getValue(localName);
  194.         }
  195.         return value;
  196.     }

  197.     /**
  198.      * Escape a value when writing XML.
  199.      *
  200.      * Replaces each character in the ESCAPES map with its escaped value.
  201.      *
  202.      * This method should only be used when generating xml manually, since most
  203.      * xml writers escape automatically.
  204.      *
  205.      * @param value
  206.      *            the value to escape
  207.      * @return the escaped value.
  208.      */
  209.     public static String escape(final String value) {
  210.         String escapedValue = value;

  211.         // replace each escapeable character
  212.         Iterator<String> iter = ESCAPES.keySet().iterator();
  213.         while (iter.hasNext()) {
  214.             String raw = iter.next();
  215.             String escaped = ESCAPES.get(raw);
  216.             escapedValue = escapedValue.replace(raw, escaped);
  217.         }

  218.         return escapedValue;
  219.     }

  220.     /**
  221.      * Unescape a value when reading XML.
  222.      *
  223.      * Replaces each escaped character in the ESCAPES map with its unescaped
  224.      * value.
  225.      *
  226.      * This method should only be used when parsing xml manually, since most xml
  227.      * parsers unescape automatically.
  228.      *
  229.      * @param value
  230.      *            the value to unescape
  231.      * @return the unescaped value.
  232.      */
  233.     public static String unescape(final String value) {
  234.         String unescapedValue = value;

  235.         // replace each escapeable character
  236.         Iterator<String> iter = ESCAPES.keySet().iterator();
  237.         while (iter.hasNext()) {
  238.             String raw = iter.next();
  239.             String escaped = ESCAPES.get(raw);
  240.             unescapedValue = unescapedValue.replace(escaped, raw);
  241.         }

  242.         return unescapedValue;
  243.     }
  244. }