Source code for shakemap.utils.dataframe

# stdlib imports
import json
import re
import string
import time

# third party imports
import pandas as pd
import numpy as np
from lxml import etree
from openpyxl import load_workbook, utils

COMPONENTS = ["GREATER_OF_TWO_HORIZONTALS", "GEOMETRIC_MEAN", "ARITHMETIC MEAN"]
CHANNEL_PATTERNS = [
    "^[H,B][H,L,N][E,N,Z,1,2,3]$",  # match standard seed names
    "^H[1,2]$",  # match H1/H2
    "^Z$",
]  # match Z
PGM_COLS = ["PGA", "PGV", "SA(0.3)", "SA(1.0)", "SA(3.0)"]
OPTIONAL = [
    "NAME",
    "DISTANCE",
    "REFERENCE",
    "INTENSITY",
    "SOURCE",
    "LOC",
    "INSTTYPE",
    "ELEV",
    "NRESP",
    "STDDEV",
    "",
    "FLAG",
    "INSTRUMENT",
    "PERIOD",
    "SENSITIVITY",
    "SERIAL",
    "SOURCE_FORMAT",
    "STRUCTURE",
    "DAMPING",
]
FLOATRE = "[-+]?[0-9]*\.?[0-9]+"


[docs]def dataframe_to_xml(df, xmlfile, reference=None):
    """Write an MMI dataframe to ShakeMap XML format.

    This method accepts a dataframe with this structure:
     - STATION: Station code (REQUIRED)
     - LAT: Station latitude. (REQUIRED)
     - LON: Station longitude. (REQUIRED)
     - DISTANCE: Distance (km) from station to origin.
     - NETID: Network ID
     - SOURCE: Description of data contributor.
     - INTENSITY: MMI intensity.
     - NRESP: Number of responses for aggregated intensity.

    Args:
        df (DataFrame): Pandas dataframe, as described in read_excel.
        xmlfile (str): Path to file where XML file should be written.
    """
    root = etree.Element("shakemap-data", code_version="3.5", map_version="3")

    create_time = int(time.time())
    stationlist = etree.SubElement(root, "stationlist", created=f"{int(create_time):d}")
    if reference is not None:
        stationlist.attrib["reference"] = reference

    processed_stations = []

    for _, row in df.iterrows():
        tmprow = row.copy()

        # assign required columns
        stationcode = str(tmprow["STATION"]).strip()

        netid = tmprow["NETID"].strip()
        if not stationcode.startswith(netid):
            stationcode = f"{netid}.{stationcode}"

        # if this is a dataframe created by shakemap,
        # there will be multiple rows per station.
        # below we process all those rows at once,
        # so we need this bookkeeping to know that
        # we've already dealt with this station
        if stationcode in processed_stations:
            continue

        station = etree.SubElement(stationlist, "station")

        station.attrib["code"] = stationcode
        station.attrib["lat"] = f"{tmprow['LAT']:.4f}"
        station.attrib["lon"] = f"{tmprow['LON']:.4f}"

        # assign optional columns
        if "NETID" in tmprow:
            station.attrib["netid"] = tmprow["NETID"].strip()
        if "DISTANCE" in tmprow:
            station.attrib["dist"] = f"{tmprow['DISTANCE']:.1f}"
        if "INTENSITY" in tmprow:
            station.attrib["intensity"] = f"{tmprow['INTENSITY']:.1f}"
        if "STDDEV" in tmprow:
            station.attrib["intensity_stddev"] = f"{tmprow['STDDEV']:.4f}"
        if "NRESP" in tmprow:
            station.attrib["nresp"] = f"{int(tmprow['NRESP']):d}"
        if "SOURCE" in tmprow:
            station.attrib["source"] = tmprow["SOURCE"].strip()

        processed_stations.append(stationcode)

    tree = etree.ElementTree(root)
    tree.write(xmlfile, pretty_print=True)
Source code for shakemap.utils.dataframe

ShakeMap Documentation

Navigation