DefaultAssociator.java

  1. /*
  2.  * DefaultAssociator
  3.  */
  4. package gov.usgs.earthquake.indexer;

  5. import java.math.BigDecimal;
  6. import java.util.ArrayList;
  7. import java.util.Date;
  8. import java.util.Iterator;
  9. import java.util.LinkedList;
  10. import java.util.List;
  11. import java.util.Map;
  12. import java.util.Set;

  13. import java.util.logging.Level;
  14. import java.util.logging.Logger;

  15. /**
  16.  * Utilities for associating events.
  17.  *
  18.  * Based on the QDM EQEventsUtils class.
  19.  */
  20. public class DefaultAssociator implements Associator {

  21.     private static final Logger LOGGER = Logger
  22.             .getLogger(DefaultAssociator.class.getName());

  23.     // time
  24.     /** Distance between related events in time, in milliseconds. */
  25.     public static final long TIME_DIFF_MILLISECONDS = 16 * 1000;

  26.     // space
  27.     /** Distance between related events in space, in kilometers. */
  28.     public static final BigDecimal LOCATION_DIFF_KILOMETER = new BigDecimal(100);

  29.     /** Number of kilometers in a degree at the equator. */
  30.     public static final BigDecimal KILOMETERS_PER_DEGREE = new BigDecimal("111.12");

  31.     /**
  32.      * Distance between related events latitude, in degrees.
  33.      *
  34.      * This is based on the max number of kilometers per degree, and provides
  35.      * the maximum latitude separation (assuming events share a longitude).
  36.      *
  37.      * Used as a pre-filter before more expensive checks.
  38.      */
  39.     public static final BigDecimal LOCATION_DIFF_DEGREES = new BigDecimal(
  40.             LOCATION_DIFF_KILOMETER.doubleValue()
  41.                     / KILOMETERS_PER_DEGREE.doubleValue());

  42.     /**
  43.      * Build an index search that searches for associated products. Products are
  44.      * considered associated if the eventid matches or their location is within
  45.      * a certain distance.
  46.      */
  47.     public SearchRequest getSearchRequest(ProductSummary summary) {
  48.         SearchRequest request = new SearchRequest();

  49.         // Order is important here. The eventId query must be added first
  50.         ProductIndexQuery eventIdQuery = getEventIdQuery(
  51.                 summary.getEventSource(), summary.getEventSourceCode());
  52.         if (eventIdQuery != null) {
  53.             request.addQuery(new EventDetailQuery(eventIdQuery));
  54.         }

  55.         // Now a query that looks for location
  56.         ProductIndexQuery locationQuery = getLocationQuery(
  57.                 summary.getEventTime(), summary.getEventLatitude(),
  58.                 summary.getEventLongitude());
  59.         if (locationQuery != null) {
  60.             request.addQuery(new EventDetailQuery(locationQuery));
  61.         }

  62.         return request;
  63.     }

  64.     /**
  65.      * Choose and return the most closely associated event.
  66.      *
  67.      * @param events
  68.      *            a list of candidate events.
  69.      * @param summary
  70.      *            the summary being associated.
  71.      * @return the best match event from the list of events.
  72.      */
  73.     public Event chooseEvent(final List<Event> events,
  74.             final ProductSummary summary) {
  75.         List<Event> filteredEvents = new LinkedList<Event>();

  76.         // remove events that are from the same source with a different code
  77.         String summarySource = summary.getEventSource();
  78.         String summaryCode = summary.getEventSourceCode();
  79.         if (summarySource == null || summaryCode == null) {
  80.             // can't check if same source with different code
  81.             filteredEvents = events;
  82.         } else {
  83.             // try to associate by event id
  84.             Iterator<Event> iter = events.iterator();
  85.             while (iter.hasNext()) {
  86.                 Event event = iter.next();

  87.                 boolean sameSourceDifferentCode = false;
  88.                 Iterator<ProductSummary> summaryIter;

  89.                 if (event.isDeleted()) {
  90.                     // ignore delete products before checking
  91.                     summaryIter = Event.getWithoutSuperseded(
  92.                             Event.getWithoutDeleted(event.getAllProductList())).iterator();
  93.                 } else {
  94.                     summaryIter = event.getProductList()
  95.                             .iterator();
  96.                 }
  97.                 while (summaryIter.hasNext()) {
  98.                     ProductSummary nextSummary = summaryIter.next();
  99.                     if (summarySource.equalsIgnoreCase(nextSummary
  100.                             .getEventSource())) {
  101.                         if (summaryCode.equalsIgnoreCase(nextSummary
  102.                                 .getEventSourceCode())) {
  103.                             // this is the event we are looking for! so stop
  104.                             // already
  105.                             return event;
  106.                         } else {
  107.                             // different event code from same source, probably a
  108.                             // different event. Don't give up yet, because
  109.                             // associate may force multiple codes from same
  110.                             // source in same event.
  111.                             sameSourceDifferentCode = true;
  112.                         }
  113.                     }
  114.                 }

  115.                 if (!sameSourceDifferentCode) {
  116.                     filteredEvents.add(event);
  117.                 }
  118.             }
  119.         }

  120.         // no events found
  121.         if (filteredEvents.size() == 0) {
  122.             return null;
  123.         }

  124.         // more than one event found
  125.         else if (filteredEvents.size() > 1) {
  126.             ArrayList<String> matches = new ArrayList<String>();
  127.             Iterator<Event> iter = filteredEvents.iterator();
  128.             while (iter.hasNext()) {
  129.                 Event match = iter.next();
  130.                 matches.add(match.getEventId());
  131.             }
  132.             LOGGER.log(Level.WARNING, "Potential merge, product id="
  133.                     + summary.getId().toString() + ", nearby events: "
  134.                     + matches.toString());

  135.             // Return the "closest" event
  136.             Event mostSimilar = chooseMostSimilar(summary, filteredEvents);
  137.             if (mostSimilar != null) {
  138.                 LOGGER.log(Level.FINE, "Associated product id="
  139.                         + summary.getId().toString() + ", to event id="
  140.                         + mostSimilar.getEventId());
  141.             }
  142.             return mostSimilar;
  143.         }

  144.         // one event found
  145.         else {
  146.             return filteredEvents.get(0);
  147.         }
  148.     }

  149.     /**
  150.      * For the given list of events, find the one that is "closest" to the given
  151.      * product. Similarity is calculated by first subtracting the event
  152.      * parameter from the product parameter, normalizing between 1 and -1, then
  153.      * calculating the Euclidean distance in the 3D space composed of the
  154.      * normalized lat, lon, and time vectors.
  155.      *
  156.      * @param summary ProductSummary to compare events with
  157.      * @param events List of events
  158.      * @return Event with lowest distance
  159.      */
  160.     protected Event chooseMostSimilar(ProductSummary summary, List<Event> events) {
  161.         double lowest = Double.POSITIVE_INFINITY;
  162.         Event bestMatch = null;

  163.         if (summary.getEventLatitude() == null
  164.                 || summary.getEventLongitude() == null
  165.                 || summary.getEventTime() == null) {
  166.             // cannot choose most similar
  167.             if (events.size() > 0) {
  168.                 // choose first
  169.                 return events.get(0);
  170.             } else {
  171.                 return null;
  172.             }
  173.         }

  174.         // find "closest" event
  175.         Iterator<Event> iter = events.iterator();
  176.         while (iter.hasNext()) {
  177.             Event event = iter.next();
  178.             try {
  179.                 EventSummary eventSummary = event.getEventSummary();
  180.                 // First get the difference between the lat, lon, and time
  181.                 double deltaLat = summary.getEventLatitude()
  182.                         .subtract(eventSummary.getLatitude()).doubleValue();
  183.                 double deltaLon = summary.getEventLongitude()
  184.                         .subtract(eventSummary.getLongitude()).doubleValue();
  185.                 double deltaTime = summary.getEventTime().getTime()
  186.                         - eventSummary.getTime().getTime();
  187.                 // Each of the deltas will now be between the range
  188.                 // -TIME_DIFF_MILLISECONDS to +TIME_DIFF_MILLISECONDS (or
  189.                 // whatever
  190.                 // the units are). To normalize, between -1 and 1, we just need
  191.                 // to
  192.                 // divide by TIME_DIFF_MILLISECONDS
  193.                 deltaLat = deltaLat / LOCATION_DIFF_DEGREES.doubleValue();
  194.                 deltaLon = deltaLon / LOCATION_DIFF_DEGREES.doubleValue();
  195.                 deltaTime = deltaTime / TIME_DIFF_MILLISECONDS;

  196.                 // Calculate the Euclidean distance between the summary and the
  197.                 // vector representing this event
  198.                 double distance = Math.sqrt(deltaLat * deltaLat + deltaLon
  199.                         * deltaLon + deltaTime * deltaTime);
  200.                 if (distance < lowest) {
  201.                     lowest = distance;
  202.                     bestMatch = event;
  203.                 }
  204.             } catch (Exception e) {
  205.                 LOGGER.log(Level.WARNING,
  206.                         "Exception checking for most similar event", e);
  207.                 // only log, but continue processing
  208.                 if (bestMatch == null) {
  209.                     // pick an event, but don't update "lowest"
  210.                     bestMatch = event;
  211.                 }
  212.             }
  213.         }

  214.         return bestMatch;
  215.     }

  216.     /**
  217.      * Check if two events are associated to each other.
  218.      *
  219.      * Reasons events may be considered disassociated:
  220.      * <ol>
  221.      * <li>Share a common EVENTSOURCE with different EVENTSOURCECODE.</li>
  222.      * <li>Either has a disassociate product for the other.</li>
  223.      * <li>Preferred location in space and time is NOT nearby, and no other
  224.      * reason to associate.</li>
  225.      * </ol>
  226.      *
  227.      * Reasons events may be considered associated:
  228.      * <ol>
  229.      * <li>Share a common EVENTID</li>
  230.      * <li>Either has an associate product for the other.</li>
  231.      * <li>Their preferred location in space and time is nearby.</li>
  232.      * </ol>
  233.      *
  234.      * @param event1
  235.      *            candidate event to test.
  236.      * @param event2
  237.      *            candidate event to test.
  238.      * @return true if associated, false otherwise.
  239.      */
  240.     @Override
  241.     public boolean eventsAssociated(Event event1, Event event2) {

  242.         // ---------------------------------------------------------//
  243.         // -- Is there an explicit association or disassocation? -- //
  244.         // ---------------------------------------------------------//

  245.         // check disassociation first
  246.         if (event1.hasDisassociateProduct(event2)
  247.                 || event2.hasDisassociateProduct(event1)) {
  248.             // explicitly disassociated
  249.             return false;
  250.         }

  251.         // associate overrides usual event source rules.
  252.         if (event1.hasAssociateProduct(event2)
  253.                 || event2.hasAssociateProduct(event1)) {
  254.             // explicitly associated
  255.             return true;
  256.         }

  257.         EventSummary event1Summary = event1.getEventSummary();
  258.         EventSummary event2Summary = event2.getEventSummary();

  259.         // ---------------------------------- //
  260.         // -- Do events share an eventid ? -- //
  261.         // ---------------------------------- //
  262.         // this check happens after associate and disassociate to allow two
  263.         // events from the same source to be forced to associate
  264.         // (bad network, bad)

  265.         // THIS CHECKS PREFERRED EVENT ID
  266.         // if source is same, check code
  267.         String event1Source = event1Summary.getSource();
  268.         String event2Source = event2Summary.getSource();
  269.         if (event1Source != null && event2Source != null
  270.                 && event1Source.equalsIgnoreCase(event2Source)) {
  271.             String event1Code = event1Summary.getSourceCode();
  272.             String event2Code = event2Summary.getSourceCode();
  273.             // this is somewhat implied, (preferred source+code are
  274.             // combination) but be safe anyways
  275.             if (event1Code != null && event2Code != null) {
  276.                 if (event1Code.equalsIgnoreCase(event2Code)) {
  277.                     // same event id
  278.                     return true;
  279.                 } else {
  280.                     // different event id from same source
  281.                     return false;
  282.                 }
  283.             }
  284.         }

  285.         // THIS CHECKS NON-PREFERRED EVENT IDS Map<String, String>
  286.         // ignore deleted sub events for this comparison
  287.         Map<String, List<String>> event1Codes = event1
  288.                 .getAllEventCodes(false);
  289.         Map<String, List<String>> event2Codes = event2
  290.                 .getAllEventCodes(false);
  291.         Set<String> commonSources = event1Codes.keySet();
  292.         commonSources.retainAll(event2Codes.keySet());

  293.         Iterator<String> eventSourceIter = commonSources.iterator();
  294.         while (eventSourceIter.hasNext()) {
  295.             String source = eventSourceIter.next();
  296.             List<String> event1SourceCodes = event1Codes.get(source);
  297.             List<String> event2SourceCodes = event2Codes.get(source);

  298.             Iterator<String> iter = event1SourceCodes.iterator();
  299.             while (iter.hasNext()) {
  300.                 if (!event2SourceCodes.contains(iter.next())) {
  301.                     return false;
  302.                 }
  303.             }

  304.             iter = event1SourceCodes.iterator();
  305.             while (iter.hasNext()) {
  306.                 if (!event1SourceCodes.contains(iter.next())) {
  307.                     return false;
  308.                 }
  309.             }
  310.         }

  311.         // --------------------------------------------------- //
  312.         // -- Are event locations (lat/lon/time) "nearby" ? -- //
  313.         // --------------------------------------------------- //
  314.         if (queryContainsLocation(
  315.                 getLocationQuery(event1Summary.getTime(), event1Summary.getLatitude(),
  316.                         event1Summary.getLongitude()), event2Summary.getTime(),
  317.                 event2Summary.getLatitude(), event2Summary.getLongitude())) {
  318.             // location matches
  319.             return true;
  320.         }

  321.         return false;
  322.     }

  323.     /**
  324.      * Build a ProductIndexQuery that searches based on event id.
  325.      *
  326.      * @param eventSource
  327.      *            the eventSource to search
  328.      * @param eventCode
  329.      *            the eventCode to search
  330.      * @return null if eventSource or eventCode are null, otherwise a
  331.      *         ProductIndexQuery. A returned ProductIndexQuery will have
  332.      *         EventSearchType SEARCH_EVENT_PREFERRED and ResultType
  333.      *         RESULT_TYPE_ALL.
  334.      */
  335.     @Override
  336.     public ProductIndexQuery getEventIdQuery(final String eventSource,
  337.             final String eventCode) {
  338.         ProductIndexQuery query = null;

  339.         if (eventSource != null && eventCode != null) {
  340.             query = new ProductIndexQuery();
  341.             // search all products, not just preferred (in case the preferred is
  342.             // a delete)
  343.             query.setEventSearchType(ProductIndexQuery.SEARCH_EVENT_PRODUCTS);
  344.             query.setResultType(ProductIndexQuery.RESULT_TYPE_ALL);

  345.             query.setEventSource(eventSource);
  346.             query.setEventSourceCode(eventCode);

  347.             query.log(LOGGER);
  348.         }

  349.         return query;
  350.     }

  351.     /**
  352.      * Build a ProductIndexQuery that searches based on location.
  353.      *
  354.      *
  355.      * @param time
  356.      *            the time to search around.
  357.      * @param latitude
  358.      *            the latitude to search around.
  359.      * @param longitude
  360.      *            the longitude to search around.
  361.      * @return null if time, latitude, or longitude are null, otherwise a
  362.      *         ProductIndexQuery. A returned ProductIndexQuery will have
  363.      *         EventSearchType SEARCH_EVENT_PREFERRED and ResultType
  364.      *         RESULT_TYPE_ALL.
  365.      */
  366.     @Override
  367.     public ProductIndexQuery getLocationQuery(final Date time,
  368.             final BigDecimal latitude, final BigDecimal longitude) {
  369.         ProductIndexQuery query = null;
  370.         if (time != null && latitude != null && longitude != null) {
  371.             query = new ProductIndexQuery();

  372.             // search all products, not just preferred (in case the preferred is
  373.             // a delete)
  374.             query.setEventSearchType(ProductIndexQuery.SEARCH_EVENT_PREFERRED);
  375.             query.setResultType(ProductIndexQuery.RESULT_TYPE_ALL);

  376.             // time
  377.             query.setMinEventTime(new Date(time.getTime()
  378.                     - TIME_DIFF_MILLISECONDS));
  379.             query.setMaxEventTime(new Date(time.getTime()
  380.                     + TIME_DIFF_MILLISECONDS));

  381.             // latitude
  382.             query.setMinEventLatitude(latitude.subtract(LOCATION_DIFF_DEGREES));
  383.             query.setMaxEventLatitude(latitude.add(LOCATION_DIFF_DEGREES));

  384.             // longitude
  385.             double lat = latitude.abs().doubleValue();
  386.             if (lat < 89.0) {
  387.                 // only restrict longitude when not close to a pole...
  388.                 BigDecimal adjustedLongitudeDiff = new BigDecimal(
  389.                         LOCATION_DIFF_DEGREES.doubleValue()
  390.                                 / Math.cos(Math.toRadians(lat)));
  391.                 query.setMinEventLongitude(longitude
  392.                         .subtract(adjustedLongitudeDiff));
  393.                 query.setMaxEventLongitude(longitude.add(adjustedLongitudeDiff));

  394.                 /* make sure to compare across date/time line */
  395.                 JDBCProductIndex jdbcProductIndex = null;
  396.                 try {
  397.                     jdbcProductIndex = new JDBCProductIndex();
  398.                 } catch (Exception e) {
  399.                     e.printStackTrace();
  400.                 }

  401.                 BigDecimal minLon = query.getMinEventLongitude();
  402.                 BigDecimal maxLon = query.getMaxEventLongitude();

  403.                 // Normalize the longitudes between -180 and 180
  404.                 query.setMinEventLongitude(jdbcProductIndex
  405.                         .normalizeLongitude(minLon));
  406.                 query.setMaxEventLongitude(jdbcProductIndex
  407.                         .normalizeLongitude(maxLon));

  408.             }

  409.             query.log(LOGGER);
  410.         }

  411.         return query;
  412.     }

  413.     /**
  414.      * Check if a location would be matched by a ProductIndexQuery.
  415.      *
  416.      * @param query
  417.      *            location query
  418.      * @param time
  419.      *            time to check
  420.      * @param latitude
  421.      *            latitude to check
  422.      * @param longitude
  423.      *            longitude to check
  424.      * @return false if query, time, latitude, or longitude are null, or if
  425.      *         min/max time, latitude, longitude are set and do not match time,
  426.      *         latitude, or longitude. otherwise, true.
  427.      */
  428.     protected boolean queryContainsLocation(final ProductIndexQuery query,
  429.             final Date time, final BigDecimal latitude,
  430.             final BigDecimal longitude) {

  431.         if (query == null || time == null || latitude == null
  432.                 || longitude == null) {
  433.             // no query or location? no contains
  434.             return false;
  435.         }

  436.         if (query.getMinEventTime() != null
  437.                 && query.getMinEventTime().after(time)) {
  438.             // time too early
  439.             return false;
  440.         }
  441.         if (query.getMaxEventTime() != null
  442.                 && query.getMaxEventTime().before(time)) {
  443.             // time too late
  444.             return false;
  445.         }

  446.         if (query.getMinEventLatitude() != null
  447.                 && query.getMinEventLatitude().compareTo(latitude) > 0) {
  448.             // latitude too small
  449.             return false;
  450.         }
  451.         if (query.getMaxEventLatitude() != null
  452.                 && query.getMaxEventLatitude().compareTo(latitude) < 0) {
  453.             // latitude too large
  454.             return false;
  455.         }

  456.         if (query.getMinEventLongitude() != null
  457.                 && query.getMaxEventLongitude() != null) {

  458.             /*
  459.              * longitude range check for min & max longitude when the
  460.              * locationQuery spans the date line
  461.              */
  462.             if (query.getMinEventLongitude().compareTo(
  463.                     query.getMaxEventLongitude()) > 0) {

  464.                 boolean inBounds = false;

  465.                 // MAX:: getMaxLongitude < longitude <= -180
  466.                 if (longitude.compareTo(query.getMaxEventLongitude()) < 0
  467.                         && longitude.compareTo(new BigDecimal("-180")) >= 0) {
  468.                     inBounds = true;
  469.                 }

  470.                 // MIN:: 180 >= longitude > getMinEventLongitude
  471.                 if (longitude.compareTo(query.getMinEventLongitude()) > 0
  472.                         && longitude.compareTo(new BigDecimal("180")) <= 0) {
  473.                     inBounds = true;
  474.                 }

  475.                 if (!inBounds) {
  476.                     return false;
  477.                 }

  478.             } else {

  479.                 if (query.getMinEventLongitude().compareTo(longitude) > 0) {
  480.                     // longitude too small
  481.                     return false;
  482.                 }
  483.                 if (query.getMaxEventLongitude().compareTo(longitude) < 0) {
  484.                     // longitude too large
  485.                     return false;
  486.                 }
  487.             }
  488.         }

  489.         // must contain location
  490.         return true;
  491.     }
  492. }