Source code for shakemap.utils.dataframe

# stdlib imports
import json
import re
import string
import time

# third party imports
import pandas as pd
import numpy as np
from lxml import etree
from openpyxl import load_workbook, utils

COMPONENTS = ["GREATER_OF_TWO_HORIZONTALS", "GEOMETRIC_MEAN", "ARITHMETIC MEAN"]
CHANNEL_PATTERNS = [
    "^[H,B][H,L,N][E,N,Z,1,2,3]$",  # match standard seed names
    "^H[1,2]$",  # match H1/H2
    "^Z$",
]  # match Z
PGM_COLS = ["PGA", "PGV", "SA(0.3)", "SA(1.0)", "SA(3.0)"]
OPTIONAL = [
    "NAME",
    "DISTANCE",
    "REFERENCE",
    "INTENSITY",
    "SOURCE",
    "LOC",
    "INSTTYPE",
    "ELEV",
    "NRESP",
    "STDDEV",
    "",
    "FLAG",
    "INSTRUMENT",
    "PERIOD",
    "SENSITIVITY",
    "SERIAL",
    "SOURCE_FORMAT",
    "STRUCTURE",
    "DAMPING",
]
FLOATRE = "[-+]?[0-9]*\.?[0-9]+"


[docs]def dataframe_to_xml(df, xmlfile, reference=None): """Write an MMI dataframe to ShakeMap XML format. This method accepts a dataframe with this structure: - STATION: Station code (REQUIRED) - LAT: Station latitude. (REQUIRED) - LON: Station longitude. (REQUIRED) - DISTANCE: Distance (km) from station to origin. - NETID: Network ID - SOURCE: Description of data contributor. - INTENSITY: MMI intensity. - NRESP: Number of responses for aggregated intensity. Args: df (DataFrame): Pandas dataframe, as described in read_excel. xmlfile (str): Path to file where XML file should be written. """ root = etree.Element("shakemap-data", code_version="3.5", map_version="3") create_time = int(time.time()) stationlist = etree.SubElement(root, "stationlist", created=f"{int(create_time):d}") if reference is not None: stationlist.attrib["reference"] = reference processed_stations = [] for _, row in df.iterrows(): tmprow = row.copy() # assign required columns stationcode = str(tmprow["STATION"]).strip() netid = tmprow["NETID"].strip() if not stationcode.startswith(netid): stationcode = f"{netid}.{stationcode}" # if this is a dataframe created by shakemap, # there will be multiple rows per station. # below we process all those rows at once, # so we need this bookkeeping to know that # we've already dealt with this station if stationcode in processed_stations: continue station = etree.SubElement(stationlist, "station") station.attrib["code"] = stationcode station.attrib["lat"] = f"{tmprow['LAT']:.4f}" station.attrib["lon"] = f"{tmprow['LON']:.4f}" # assign optional columns if "NETID" in tmprow: station.attrib["netid"] = tmprow["NETID"].strip() if "DISTANCE" in tmprow: station.attrib["dist"] = f"{tmprow['DISTANCE']:.1f}" if "INTENSITY" in tmprow: station.attrib["intensity"] = f"{tmprow['INTENSITY']:.1f}" if "STDDEV" in tmprow: station.attrib["intensity_stddev"] = f"{tmprow['STDDEV']:.4f}" if "NRESP" in tmprow: station.attrib["nresp"] = f"{int(tmprow['NRESP']):d}" if "SOURCE" in tmprow: station.attrib["source"] = tmprow["SOURCE"].strip() processed_stations.append(stationcode) tree = etree.ElementTree(root) tree.write(xmlfile, pretty_print=True)