XmlUtils.java
/*
* XmlUtils
*
* $Id$
* $HeadURL$
*/
package gov.usgs.util;
import java.io.IOException;
import java.io.InputStream;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.TimeZone;
import javax.xml.datatype.DatatypeConstants;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* Xml parsing utility functions.
*
* @author jmfee
*
*/
public class XmlUtils {
/** Hashmap of ESCAPES */
public static final Map<String, String> ESCAPES = new HashMap<String, String>();
static {
// xml
ESCAPES.put("&", "&");
ESCAPES.put("<", "<");
ESCAPES.put(">", ">");
ESCAPES.put("\"", """);
ESCAPES.put("'", "'");
// whitespace characters
ESCAPES.put("\t", "	"); // tab
ESCAPES.put("\n", "
"); // newline
ESCAPES.put("\r", "
"); // carriage return
}
/**
* Convenience method to format a Date as an XML DateTime String.
*
* @param date
* the date to format.
* @return the XML representation as a string.
*/
public static String formatDate(final Date date) {
if (date == null) {
return null;
}
GregorianCalendar calendar = new GregorianCalendar();
calendar.setTimeInMillis(date.getTime());
return formatGregorianCalendar(calendar);
}
/**
* Format a Gregorian Calendar as an XML DateTime String.
*
* @param calendar
* the calendar to format.
* @return the XML representation as a string.
*/
public static String formatGregorianCalendar(
final GregorianCalendar calendar) {
try {
return DatatypeFactory.newInstance()
.newXMLGregorianCalendar(calendar).normalize()
.toXMLFormat();
} catch (Exception e) {
return null;
}
}
/**
* Convenience method to parse an XML Date Time into a Date. Only useful
* when the XML Date Time is within the Date object time range.
*
* @param toParse
* the xml date time string to parse.
* @return the parsed Date object.
*/
public static Date getDate(final String toParse) {
XMLGregorianCalendar calendar = getXMLGregorianCalendar(toParse);
if (calendar != null) {
return new Date(calendar.toGregorianCalendar().getTimeInMillis());
} else {
return null;
}
}
/**
* Parse an XML Date Time into an XMLGregorianCalendar.
*
* @param toParse
* the xml date time string to parse.
* @return the parsed XMLGregorianCalendar object.
*/
public static XMLGregorianCalendar getXMLGregorianCalendar(
final String toParse) {
try {
return DatatypeFactory.newInstance().newXMLGregorianCalendar(
toParse);
} catch (Exception e) {
return null;
}
}
/**
* Converts an XMLGregorianCalendar to a Date.
*
* @param xmlDate
* XMLGregorianCalendar to convert.
* @return corresponding date object.
*/
public static Date getDate(final XMLGregorianCalendar xmlDate) {
// is this equivalent to getDate(String) processing above??
// start with UTC, i.e. no daylight savings time.
TimeZone timezone = TimeZone.getTimeZone("GMT");
// adjust timezone to match xmldate
int offsetMinutes = xmlDate.getTimezone();
if (offsetMinutes != DatatypeConstants.FIELD_UNDEFINED) {
timezone.setRawOffset(
// convert minutes to milliseconds
offsetMinutes * 60 // seconds per minute
* 1000 // milliseconds per second
);
}
// use calendar so parsed date will be UTC
Calendar calendar = Calendar.getInstance(timezone);
calendar.clear();
calendar.set(xmlDate.getYear(),
// xmlcalendar is 1 based, calender is 0 based
xmlDate.getMonth() - 1, xmlDate.getDay(), xmlDate.getHour(),
xmlDate.getMinute(), xmlDate.getSecond());
Date date = calendar.getTime();
int millis = xmlDate.getMillisecond();
if (millis != DatatypeConstants.FIELD_UNDEFINED) {
calendar.setTimeInMillis(calendar.getTimeInMillis() + millis);
}
return date;
}
/**
* Creates an XMLReader and uses handler as a content and error handler.
*
* @param xml
* source of xml.
* @param handler
* SAX handler for xml.
* @throws SAXException
* if any exceptions occur during parsing.
* @throws IOException
* if unable to convert xml to an inputstream.
* @throws ParserConfigurationException
* if unable to create a namespace aware parser.
*/
public static void parse(final Object xml, final DefaultHandler handler)
throws SAXException, IOException, ParserConfigurationException {
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser sp = spf.newSAXParser();
XMLReader xr = sp.getXMLReader();
xr.setContentHandler(handler);
xr.setErrorHandler(handler);
InputStream in = StreamUtils.getInputStream(xml);
try {
xr.parse(new InputSource(in));
} finally {
StreamUtils.closeStream(in);
}
}
/**
* Sometimes parsers do not preserve the namespace for attributes. This
* attempts to use the namespace and localname, and, if not available using
* the namespace, checks for an attribute using only localname.
*
* @param attributes
* Attributes object to search.
* @param uri
* namespace of attribute.
* @param localName
* local name of attribute.
* @return value of attribute.
*/
public static String getAttribute(final Attributes attributes,
final String uri, final String localName) {
String value = attributes.getValue(uri, localName);
if (value == null) {
value = attributes.getValue(localName);
}
return value;
}
/**
* Escape a value when writing XML.
*
* Replaces each character in the ESCAPES map with its escaped value.
*
* This method should only be used when generating xml manually, since most
* xml writers escape automatically.
*
* @param value
* the value to escape
* @return the escaped value.
*/
public static String escape(final String value) {
String escapedValue = value;
// replace each escapeable character
Iterator<String> iter = ESCAPES.keySet().iterator();
while (iter.hasNext()) {
String raw = iter.next();
String escaped = ESCAPES.get(raw);
escapedValue = escapedValue.replace(raw, escaped);
}
return escapedValue;
}
/**
* Unescape a value when reading XML.
*
* Replaces each escaped character in the ESCAPES map with its unescaped
* value.
*
* This method should only be used when parsing xml manually, since most xml
* parsers unescape automatically.
*
* @param value
* the value to unescape
* @return the unescaped value.
*/
public static String unescape(final String value) {
String unescapedValue = value;
// replace each escapeable character
Iterator<String> iter = ESCAPES.keySet().iterator();
while (iter.hasNext()) {
String raw = iter.next();
String escaped = ESCAPES.get(raw);
unescapedValue = unescapedValue.replace(escaped, raw);
}
return unescapedValue;
}
}