#!/usr/bin/env python
# -*- coding: utf-8 -*-
# stdlib imports
import numpy as np
import collections
import os
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.signal import convolve
from numpy import matlib
from scipy.stats import beta
from configobj import ConfigObj
# local imports
from mapio.shake import ShakeGrid
from mapio.gdal import GDALGrid
from mapio.reader import read
# Turn off warnings that will pop up regarding nan's in greater than operations
np.warnings.filterwarnings("ignore")
[docs]def computeStats(
grid2D,
stdgrid2D=None,
shakefile=None,
shakethreshtype="pga",
shakethresh=0.0,
probthresh=None,
pop_file=None,
stdtype="full",
maxP=1.0,
proj="moll",
):
"""
Compute summary stats of a ground failure model output.
Args:
grid2D: grid2D object of model output.
stdgrid2D: grid2D object of model standard deviations (optional)
shakefile: Optional, path to shakemap file to use for ground motion
threshold.
shakethreshtype: Optional, Type of ground motion to use for
shakethresh, 'pga', 'pgv', or 'mmi'.
shakethresh: Optional, Float in %g for
pga, cm/s for pgv, float for mmi. Used for Hagg and Exposure
computation
probthresh: Optional, None or float, exclude any cells with
probabilities less than or equal to this value
pop_file (str): File path to population file to use to compute exposure
stats
stdtype (str): assumption of spatial correlation used to compute
the stdev of the statistics, 'max', 'min' or 'mean' of max and min,
or full (default) estimates std considering covariance
maxP (float): maximum possible value of P (1 default, but coverage
models have smaller values, 0.487 and 0.256 for LQ and LS)
proj (str): projection string to use when computing stats
Returns:
dict: Dictionary with all or some of the following keys
(depending on input options):
- Max
- Median
- Std
- hagg_# where # is the shaking threshold input
- hagg_std_# standard deviation of hagg_#
- hlim_# maximum possible value of hagg (for given Pmax value)
- p_hagg_# beta function shapefile p for hagg
- q_hagg_# beta function shapefile q for hagg
- exp_pop_# where # is the shaking threshold (if pop_file specified)
- exp_std_# (optional) standard deviation of exp_pop_#
- elim_# maximum possible value of E (for given Pmax value)
- p_exp_# beta function shapefile p for exp_pop
- q_exp_# beta function shapefile q for exp_pop
"""
stats = collections.OrderedDict()
grid = grid2D.getData().copy()
if probthresh is not None:
grid = grid[grid > probthresh]
else:
probthresh = 0.0
if len(grid) == 0:
print("no probability values above probthresh")
stats["Max"] = 0.0 # float('nan')
stats["Median"] = 0.0 # float('nan')
stats["Std"] = 0.0 # float('nan')
else:
stats["Max"] = float(np.nanmax(grid))
stats["Median"] = float(np.nanmedian(grid))
stats["Std"] = float(np.nanstd(grid))
output = computeHagg(
grid2D,
probthresh=probthresh,
shakefile=shakefile,
shakethreshtype=shakethreshtype,
stdtype=stdtype,
shakethresh=shakethresh,
stdgrid2D=stdgrid2D,
maxP=maxP,
sill1=None,
range1=None,
proj=proj,
)
hagg_dict = output
for k, v in hagg_dict.items():
stats[k] = v
if pop_file is None:
try:
# Try to find population file in .gfail_defaults
default_file = os.path.join(os.path.expanduser("~"), ".gfail_defaults")
defaults = ConfigObj(default_file)
pop_file = defaults["popfile"]
except BaseException:
print(
"No population file specified nor found in .gfail_defaults, "
"skipping exp_pop"
)
if pop_file is not None:
exp_dict = computePexp(
grid2D,
pop_file,
shakefile=shakefile,
shakethreshtype=shakethreshtype,
shakethresh=shakethresh,
probthresh=probthresh,
stdgrid2D=stdgrid2D,
stdtype=stdtype,
maxP=maxP,
sill1=None,
range1=None,
)
for k, v in exp_dict.items():
stats[k] = v
return stats
[docs]def computeHagg(
grid2D,
proj="moll",
probthresh=0.0,
shakefile=None,
shakethreshtype="pga",
shakethresh=0.0,
stdgrid2D=None,
stdtype="full",
maxP=1.0,
sill1=None,
range1=None,
):
"""
Computes the Aggregate Hazard (Hagg) which is equal to the
probability * area of grid cell For models that compute areal coverage,
this is equivalant to the total predicted area affected in km2.
Args:
grid2D: grid2D object of model output.
proj: projection to use to obtain equal area, 'moll' mollweide, or
'laea' lambert equal area.
probthresh: Probability threshold, any values less than this will not
be included in aggregate hazard estimation.
shakefile: Optional, path to shakemap file to use for ground motion
threshold.
shakethreshtype: Optional, Type of ground motion to use for
shakethresh, 'pga', 'pgv', or 'mmi'.
shakethresh: Optional, Float or list of shaking thresholds in %g for
pga, cm/s for pgv, float for mmi.
stdgrid2D: grid2D object of model standard deviations (optional)
stdtype (str): assumption of spatial correlation used to compute
the stdev of the statistics, 'max', 'min', 'mean' of max and min,
or 'full' (default) which estimates the range of correlation and
accounts for covariance. Will return 'mean' if
ridge and sill cannot be estimated.
maxP (float): the maximum possible probability of the model
sill1 (float, None): If known, the sill of the variogram of grid2D,
will be estimated if None and stdtype='full'
range1 (float, None): If known, the range of the variogram of grid2D,
will be estimated if None and stdtype='full'
Returns:
dict: Dictionary with keys:
hagg_#g where # is the shakethresh
std_# if stdgrid2D is supplied (stdev of exp_pop)
hlim_#, the maximum exposure value possible with the
applied thresholds and given maxP value
cell_area_km2 grid cell area
p_hagg_# beta distribution shape factor p (sometimes called alpha)
q_hagg_# beta distribution shape factor q (sometimes called beta)
"""
bounds = grid2D.getBounds()
lat0 = np.mean((bounds[2], bounds[3]))
lon0 = np.mean((bounds[0], bounds[1]))
projs = (
"+proj=%s +lat_0=%f +lon_0=%f +x_0=0 +y_0=0 +ellps=WGS84 "
"+units=km +no_defs" % (proj, lat0, lon0)
)
geodict = grid2D.getGeoDict()
if shakefile is not None:
if shakethresh < 0.0:
raise Exception("shaking threshold must be equal or greater than zero")
# resample shakemap to grid2D
temp = ShakeGrid.load(
shakefile,
samplegeodict=geodict,
resample=True,
doPadding=True,
method="linear",
adjust="res",
)
shk = temp.getLayer(shakethreshtype)
# shk = shk.interpolate2(geodict)
if shk.getGeoDict() != geodict:
raise Exception(
"shakemap was not resampled to exactly the same geodict as the model"
)
if probthresh < 0.0:
raise Exception("probability threshold must be equal or greater than zero")
grid = grid2D.project(projection=projs, method="bilinear")
geodictRS = grid.getGeoDict()
cell_area_km2 = geodictRS.dx * geodictRS.dy
model = grid.getData().copy()
Hagg = {}
if shakefile is not None:
shkgrid = shk.project(projection=projs)
shkdat = shkgrid.getData()
model[shkdat < shakethresh] = float("nan")
else:
shakethresh = 0.0
shkdat = None
mu = np.nansum(model[model >= probthresh] * cell_area_km2)
Hagg["hagg_%1.2fg" % (shakethresh / 100.0,)] = mu
Hagg["cell_area_km2"] = cell_area_km2
N = np.nansum([model >= probthresh])
# N = np.nansum([model >= 0.])
# Hagg['N_%1.2fg' % (shakethresh/100.,)] = N
hlim = cell_area_km2 * N * maxP
Hagg["hlim_%1.2fg" % (shakethresh / 100.0,)] = hlim
if stdgrid2D is not None:
stdgrid = GDALGrid.copyFromGrid(stdgrid2D) # Make a copy
stdgrid = stdgrid.project(projection=projs, method="bilinear")
std = stdgrid.getData().copy()
if shakefile is not None: # Nan out areas where shaking is too low also
std[shkdat < shakethresh] = float("nan")
Hagg["hagg_range"] = None
Hagg["hagg_sill"] = None
if np.nanmax(std) > 0.0 and np.nanmax(model) >= probthresh:
totalmin = cell_area_km2 * -np.sqrt(
np.nansum((std[model >= probthresh]) ** 2.0)
)
totalmax = np.nansum(std[model >= probthresh] * cell_area_km2)
if stdtype == "full":
if sill1 is None or range1 is None:
# Determine correct range to search by width of area that has
# non_nan values
cols = np.where(np.sum(~np.isnan(model), axis=0) > 0.0)
rows = np.where(np.sum(~np.isnan(model), axis=1) > 0.0)
maxlag = int(
np.min(
[
(np.max(cols) - np.min(cols)),
(np.max(rows) - np.min(rows)),
]
)
/ 2.0
)
try2 = (
int(
np.max(
[
(np.max(cols) - np.min(cols)),
(np.max(rows) - np.min(rows)),
]
)
)
+ 100
)
if maxlag < 100:
maxlag = 100
if maxlag > 800:
maxlag = 800
# print('maxlag %d' % maxlag)
range1, sill1 = semivario(
grid.getData().copy(),
probthresh,
shakethresh=shakethresh,
shakegrid=shkdat,
maxlag=maxlag,
)
# If sill not found, expand maxlag
if range1 is not None: # If None, not enough points, skip ahead
if maxlag - range1 < 1.0:
print("no sill found, expanding maxlag to %d" % try2)
range1, sill1 = semivario(
grid.getData().copy(),
probthresh,
shakethresh=shakethresh,
shakegrid=shkdat,
maxlag=try2,
)
if range1 is not None:
if try2 - range1 < 1.0:
print(
"No sill found in semivariogram even after "
"expanding maxlag, assuming max uncertainty"
)
range1 = None
sill1 = None
Hagg["hagg_range"] = range1
Hagg["hagg_sill"] = sill1
if range1 is None:
# Use max because no sill found or probs are super low
Hagg[
"hagg_std_%1.2fg" % (shakethresh / 100.0,)
] = totalmax # (totalmax + totalmin) / 2.
else:
# Zero out std at cells where the model probability was
# below the threshold because we aren't including those
# cells in Hagg
stdz = std.copy()
stdz[model < probthresh] = 0.0
svar1 = svar(stdz, range1, sill1, scale=cell_area_km2)
Hagg["hagg_std_%1.2fg" % (shakethresh / 100.0,)] = np.sqrt(svar1)
# Hagg['hagg_range_%1.2fg' % (shakethresh/100.,)] = range1
# Hagg['hagg_sill_%1.2fg' % (shakethresh/100.,)] = sill1
elif stdtype == "max":
Hagg["hagg_std_%1.2fg" % (shakethresh / 100.0,)] = totalmax
elif stdtype == "min":
Hagg["hagg_std_%1.2fg" % (shakethresh / 100.0,)] = totalmin
else:
Hagg["hagg_std_%1.2fg" % (shakethresh / 100.0,)] = (
totalmax + totalmin
) / 2.0
var = Hagg["hagg_std_%1.2fg" % (shakethresh / 100.0,)] ** 2.0
# Beta distribution shape factors
ph = (mu / hlim) * ((hlim * mu - mu**2) / var - 1)
qh = (1 - mu / hlim) * ((hlim * mu - mu**2) / var - 1)
Hagg["p_hagg_%1.2fg" % (shakethresh / 100.0,)] = ph
Hagg["q_hagg_%1.2fg" % (shakethresh / 100.0,)] = qh
# Compute 1 and 2 std ranges
if ph > 0.0 and qh > 0.0:
Hagg["hagg_1std_range_%1.2fg" % (shakethresh / 100.0,)] = get_rangebeta(
ph, qh, prob=0.6827, minlim=0.0, maxlim=hlim
)
Hagg["hagg_2std_range_%1.2fg" % (shakethresh / 100.0,)] = get_rangebeta(
ph, qh, prob=0.9545, minlim=0.0, maxlim=hlim
)
else:
Hagg["hagg_1std_range_%1.2fg" % (shakethresh / 100.0,)] = None
Hagg["hagg_2std_range_%1.2fg" % (shakethresh / 100.0,)] = None
else:
print(
"No model values above threshold, skipping uncertainty "
"and filling with zeros"
)
Hagg["hagg_std_%1.2fg" % (shakethresh / 100.0,)] = 0.0
Hagg["p_hagg_%1.2fg" % (shakethresh / 100.0,)] = 0.0
Hagg["q_hagg_%1.2fg" % (shakethresh / 100.0,)] = 0.0
Hagg["hagg_1std_range_%1.2fg" % (shakethresh / 100.0,)] = None
Hagg["hagg_2std_range_%1.2fg" % (shakethresh / 100.0,)] = None
else:
print("No uncertainty provided, filling with zeros")
Hagg["hagg_std_%1.2fg" % (shakethresh / 100.0,)] = 0.0
Hagg["p_hagg_%1.2fg" % (shakethresh / 100.0,)] = 0.0
Hagg["q_hagg_%1.2fg" % (shakethresh / 100.0,)] = 0.0
Hagg["hagg_1std_range_%1.2fg" % (shakethresh / 100.0,)] = None
Hagg["hagg_2std_range_%1.2fg" % (shakethresh / 100.0,)] = None
return Hagg
[docs]def computePexp(
grid,
pop_file,
shakefile=None,
shakethreshtype="pga",
shakethresh=0.0,
probthresh=0.0,
stdgrid2D=None,
stdtype="full",
maxP=1.0,
sill1=None,
range1=None,
):
"""
Get exposure-based statistics.
Args:
grid: Model grid.
pop_file (str): Path to the landscan population grid.
shakefile (str): Optional, path to shakemap file to use for ground
motion threshold.
shakethreshtype(str): Optional, Type of ground motion to use for
shakethresh, 'pga', 'pgv', or 'mmi'.
shakethresh: Float or list of shaking thresholds in %g for
pga, cm/s for pgv, float for mmi.
probthresh: Float, exclude any cells with
probabilities less than or equal to this value
stdgrid2D: grid2D object of model standard deviations (optional)
stdtype (str): assumption of spatial correlation used to compute
the stdev of the statistics, 'max', 'min', 'mean' of max and min,
or 'full' (default) which estimates the range of correlation and
accounts for covariance. Will return 'mean' if
ridge and sill cannot be estimated.
maxP (float): the maximum possible probability of the model
sill1 (float, None): If known, the sill of the variogram of grid2D,
will be estimated if None and stdtype='full'
range1 (float, None): If known, the range of the variogram of grid2D,
will be estimated if None and stdtype='full'
Returns:
dict: Dictionary with keys named exp_pop_# where # is the shakethresh
and exp_std_# if stdgrid2D is supplied (stdev of exp_pop)
and elim_#, the maximum exposure value possible with the
applied thresholds and given maxP value
p_exp_# beta distribution shape factor p (sometimes called alpha)
q_exp_# beta distribution shape factor q (sometimes called beta)
"""
model = grid.getData().copy()
mdict = grid.getGeoDict()
# Figure out difference in resolution of popfile to shakefile
ptemp, J = GDALGrid.getFileGeoDict(pop_file)
factor = ptemp.dx / mdict.dx
# Cut out area from population file
popcut1 = read(
pop_file,
samplegeodict=mdict,
method="nearest",
resample=True,
doPadding=True,
interp_approach="rasterio",
)
# Adjust for upsampling factor to avoid creating new people
popcut1.setData(popcut1.getData() / factor**2)
# Upsample to mdict
popdat = popcut1.getData()
exp_pop = {}
if shakefile is not None:
if shakethresh < 0.0:
raise Exception("shaking threshold must be equal or greater than zero")
# resample shakemap to grid2D
temp = ShakeGrid.load(
shakefile,
samplegeodict=mdict,
resample=True,
doPadding=True,
method="linear",
adjust="res",
)
shk = temp.getLayer(shakethreshtype)
# shk = shk.interpolate2(mdict)
if shk.getGeoDict() != mdict:
raise Exception(
"shakemap was not resampled to exactly the same geodict as the model"
)
shkdat = shk.getData()
model[shkdat < shakethresh] = float("nan")
else:
shakethresh = 0.0
shkdat = None
mu = np.nansum(model[model >= probthresh] * popdat[model >= probthresh])
exp_pop["exp_pop_%1.2fg" % (shakethresh / 100.0,)] = mu
# N = np.nansum([model >= probthresh])
# exp_pop['N_%1.2fg' % (shakethresh/100.,)] = N
elim = np.nansum(popdat[model >= probthresh]) * maxP
# elim = np.nansum(popdat[model >= 0.]) * maxP
exp_pop["elim_%1.2fg" % (shakethresh / 100.0,)] = elim
if stdgrid2D is not None:
std = stdgrid2D.getData().copy()
if shakefile is not None: # Nan out areas where shaking is too low also
std[shkdat < shakethresh] = float("nan")
exp_pop["exp_range"] = None
exp_pop["exp_sill"] = None
if np.nanmax(std) > 0.0 and np.nanmax(model) >= probthresh:
totalmin = np.sqrt(
np.nansum(
(popdat[model >= probthresh] * std[model >= probthresh]) ** 2.0
)
)
totalmax = np.nansum(std[model >= probthresh] * popdat[model >= probthresh])
if stdtype == "full":
if sill1 is None or range1 is None:
# Determine correct range to search by width of area that has
# non_nan values
cols = np.where(np.sum(~np.isnan(model), axis=0) > 0.0)
rows = np.where(np.sum(~np.isnan(model), axis=1) > 0.0)
maxlag = int(
np.min(
[
(np.max(cols) - np.min(cols)),
(np.max(rows) - np.min(rows)),
]
)
/ 2.0
)
try2 = (
int(
np.max(
[
(np.max(cols) - np.min(cols)),
(np.max(rows) - np.min(rows)),
]
)
)
+ 100
)
if maxlag < 100:
maxlag = 100
# print('maxlag %d' % maxlag)
range1, sill1 = semivario(
grid.getData().copy(),
probthresh,
shakethresh=shakethresh,
shakegrid=shkdat,
maxlag=maxlag,
)
if range1 is not None: # If None, not enough points, skip ahead
# If sill not found, expand maxlag
if maxlag - range1 < 1.0:
print("no sill found, expanding maxlag to %d" % try2)
range1, sill1 = semivario(
grid.getData().copy(),
probthresh,
shakethresh=shakethresh,
shakegrid=shkdat,
maxlag=try2,
)
if range1 is not None:
if try2 - range1 < 1.0:
print(
"No sill found in semivariogram even after "
"expanding maxlag, assuming max uncertainty"
)
range1 = None
sill1 = None
exp_pop["exp_range"] = range1
exp_pop["exp_sill"] = sill1
if range1 is None:
# Use max because no sill was found
exp_pop["exp_std_%1.2fg" % (shakethresh / 100.0,)] = totalmax # \
# (totalmax + totalmin) / 2.
else:
# Zero out std at cells where the model probability was
# below the threshold because we aren't including those
# cells in Hagg
stdz = std.copy()
stdz[model < probthresh] = 0.0
svar1 = svar(stdz, range1, sill1, scale=popdat)
exp_pop["exp_std_%1.2fg" % (shakethresh / 100.0,)] = np.sqrt(svar1)
# exp_pop['exp_range_%1.2fg' % (shakethresh/100.,)] = range1
# exp_pop['exp_sill_%1.2fg' % (shakethresh/100.,)] = sill1
elif stdtype == "max":
exp_pop["exp_std_%1.2fg" % (shakethresh / 100.0,)] = totalmax
elif stdtype == "min":
exp_pop["exp_std_%1.2fg" % (shakethresh / 100.0,)] = totalmin
else:
exp_pop["exp_std_%1.2fg" % (shakethresh / 100.0,)] = (
totalmax + totalmin
) / 2.0
# Beta distribution shape factors
var = exp_pop["exp_std_%1.2fg" % (shakethresh / 100.0,)] ** 2.0
pe = (mu / elim) * ((elim * mu - mu**2) / var - 1)
qe = (1 - mu / elim) * ((elim * mu - mu**2) / var - 1)
exp_pop["p_exp_%1.2fg" % (shakethresh / 100.0,)] = pe
exp_pop["q_exp_%1.2fg" % (shakethresh / 100.0,)] = qe
# Compute 1 and 2 std ranges
if pe > 0.0 and qe > 0.0:
exp_pop[
"pop_1std_range_%1.2fg" % (shakethresh / 100.0,)
] = get_rangebeta(pe, qe, prob=0.6827, minlim=0.0, maxlim=elim)
exp_pop[
"pop_2std_range_%1.2fg" % (shakethresh / 100.0,)
] = get_rangebeta(pe, qe, prob=0.9545, minlim=0.0, maxlim=elim)
else:
exp_pop["pop_1std_range_%1.2fg" % (shakethresh / 100.0,)] = None
exp_pop["pop_2std_range_%1.2fg" % (shakethresh / 100.0,)] = None
else:
print("no std values above zero, filling with zeros")
exp_pop["exp_std_%1.2fg" % (shakethresh / 100.0,)] = 0.0
exp_pop["p_exp_%1.2fg" % (shakethresh / 100.0,)] = 0.0
exp_pop["q_exp_%1.2fg" % (shakethresh / 100.0,)] = 0.0
exp_pop["pop_1std_range_%1.2fg" % (shakethresh / 100.0,)] = None
exp_pop["pop_2std_range_%1.2fg" % (shakethresh / 100.0,)] = None
else:
exp_pop["exp_std_%1.2fg" % (shakethresh / 100.0,)] = 0.0
exp_pop["p_exp_%1.2fg" % (shakethresh / 100.0,)] = 0.0
exp_pop["q_exp_%1.2fg" % (shakethresh / 100.0,)] = 0.0
exp_pop["pop_1std_range_%1.2fg" % (shakethresh / 100.0,)] = None
exp_pop["pop_2std_range_%1.2fg" % (shakethresh / 100.0,)] = None
return exp_pop
[docs]def semivario(
model,
threshold=0.0,
maxlag=100,
npts=1000,
ndists=200,
nvbins=20,
makeplots=False,
shakegrid=None,
shakethresh=0.0,
minpts=50,
):
"""
Quickly estimate semivariogram with by selecting seed points and then
computing semivariogram between each of those points and ndists random
locations around it that are within maxlag of the seed point. Will result
in npts x ndists total distance pairs. Uses spherical model.
Args:
model (array): array of raster to estimate semivariogram for
threshold:
maxlag (int): in pixels
npts (int): number of seed points to sample from
ndists (int): number of points to sample at random distances from each
seed point
nvbins (int): number of semivariogram bins
minpts (float): minimum number of samples above threshold required
makeplots (bool): create semivariogram plots
shakegrid (array): array of shaking that is the same size as model
shakethresh (float): Shaking threshold for seed point selection
Returns:
range, sill
"""
model = model.copy()
if threshold is None:
threshold = 0.0
if shakegrid is None or shakethresh == 0.0:
shakegrid = np.zeros(np.shape(model))
if np.shape(shakegrid) != np.shape(model):
raise Exception("Shakegrid is not the same shape as the model")
# prepare data
nrows, ncols = np.shape(model)
rows = matlib.repmat(np.arange(nrows), ncols, 1).T
cols = matlib.repmat(np.arange(ncols), nrows, 1)
values = model.flatten()
shkvals = shakegrid.flatten()
rowsf = rows.flatten()
colsf = cols.flatten()
# Select npts seed points
indx = np.where((values >= threshold) & (shkvals >= shakethresh))[0]
if len(indx) < minpts:
print(
"Not enough values above thresholds in model. "
"Returning empty range and sill results"
)
return None, None
np.random.seed(47) # Always use same seed so results are repeatable
# just select all points if there aren't npts above the threshold
picks = np.random.choice(len(indx), size=np.min((npts, len(indx))), replace=False)
seedpts = indx[picks]
# Get lags and differences for seed point vs. ndists other points around it
# TODO vectorize this to remove loop
lags = np.array([])
diffs = np.array([])
# vals = np.array([])
seednums = range(len(seedpts))
seednums2 = reversed(range(len(seedpts)))
for seed, seednum1, seednum2 in zip(seedpts, seednums, seednums2):
row1 = rowsf[seed]
col1 = colsf[seed]
np.random.seed(seednum1)
addr = np.random.randint(
np.max((-maxlag, -row1)), np.min((maxlag, nrows - row1)), size=ndists
)
np.random.seed(seednum2)
addc = np.random.randint(
np.max((-maxlag, -col1)), np.min((maxlag, ncols - col1)), size=ndists
)
indr = row1 + addr
indc = col1 + addc
newvalues = model[indr, indc]
ptval = model[row1, col1]
dists = np.sqrt(addr**2 + addc**2) # distance in pixels
# Remove any invalid samples
dists = dists[np.isfinite(newvalues)]
newvalues = newvalues[np.isfinite(newvalues)]
difvals = np.abs(newvalues - ptval)
diffs = np.hstack((diffs, difvals))
lags = np.hstack((lags, dists))
# vals = np.hstack((vals, newvalues, ptval))
diffs2 = diffs**2
# % Make variogram out of these samples
# binedges = np.linspace(0, maxlag, num=nvbins + 1, endpoint=True)
binedges = np.logspace(0, np.log10(maxlag), num=nvbins + 1, endpoint=True)
binmid = (binedges[:-1] + binedges[1:]) / 2
subs = np.zeros(nvbins)
N = np.zeros(nvbins)
for b in range(nvbins):
inrange = diffs2[(lags > binedges[b]) & (lags <= binedges[b + 1])]
subs[b] = np.nansum(inrange)
N[b] = len(inrange)
semiv = 1.0 / (2 * N) * subs
# Fit model using weighting by 1/N to weigh bins with more samples higher
popt, pcov = curve_fit(
spherical,
binmid[np.isfinite(semiv)],
semiv[np.isfinite(semiv)],
sigma=1.0 / N[np.isfinite(semiv)],
absolute_sigma=False,
bounds=(0, [maxlag, 1.0]),
)
range2, sill2 = popt
# makeplots = True
if makeplots:
plt.figure()
plt.plot(binmid, semiv, "ob")
plt.xlabel("Lags (pixels)")
plt.ylabel("Semivariance")
plt.plot(binmid, spherical(binmid, *popt), "-b")
# plt.figure()
# corr = (sill2 - spherical(binmid, range2, sill2)) / sill2
# plt.plot(binmid, corr, 'ob')
# plt.xlabel('Lags (pixels)')
# plt.ylabel('Correlation')
return range2, sill2
[docs]def spherical(lag, range1, sill): # , nugget=0):
"""
Spherical variogram model assuming nugget = 0
Args:
lag: float or array of lags as # of pixels/cells
range1 (float): range of spherical model
sill (float): sill of spherical model
Returns:
semivariance as float or array, depending on type(lag)
"""
nugget = 0.0
range1 = range1 / 1.0
out = nugget + sill * ((1.5 * (lag / range1)) - (0.5 * ((lag / range1) ** 3.0)))
if isinstance(out, float):
if lag > range1:
out = nugget + sill
else:
out[lag > range1] = nugget + sill
return out
[docs]def svar(stds, range1, sill1, scale=1.0):
"""
Estimate variance of aggregate statistic using correlation from
semivariogram and std values for each pair of cells that are within range
of each other, add up quickly by creating kernal of the correlations and
convolving with the image, then multiply by std to equal sum of
std1*std2*corr*scale1*scale2 over each valid cell
Args:
stds (array): grid of standard deviation of model
range1 (float): range of empirical variogram used to estimate
correlation model
sill1 (float): sill of empirical variogram used to estimate
correlation model
scale: float or array same size as std, factor to multiply by
(area or population) and include in convolution
Returns:
variance of aggregate statistic
"""
range5 = int(range1)
# Prepare kernal that is size of range of spherical equation
nrows = 2 * range5 + 1
ncols = 2 * range5 + 1
# get distance in row and col from center point
rows = matlib.repmat(np.arange(nrows), ncols, 1).T - range5
cols = matlib.repmat(np.arange(ncols), nrows, 1) - range5
dists = np.sqrt(rows**2 + cols**2)
# Convert from semivariance to correlation and build kernal
kernal = (sill1 - spherical(dists, range1, sill1)) / sill1
# convolve with stds, equivalent to sum of corr * std at each pixel for
# within range1
# out = convolve2d(stds, kernal, mode='same')
# Replace all nans with zeros so can use fft convolve
stdzeros = stds.copy()
stdzeros[np.isnan(stds)] = 0.0
if not isinstance(scale, float):
scale[np.isnan(scale)] = 0.0
stdzeros *= scale
out = convolve(stdzeros, kernal, mode="same")
# multiply by stds and scale again
full1 = scale * out * stds
# add up
var2 = np.nansum(full1)
return var2
[docs]def get_rangebeta(p, q, prob=0.95, minlim=0, maxlim=1):
"""
Get endpoints of the range of the specified beta function that contain
prob percent of the distribution
Args:
p (float):
p shape factor of beta distribution (a in scipy)
q (float):
q shape factor of beta distribution (b in scipy)
prob (float):
central probability of distribution to return the range
of. Value from 0 to 1
minlim (float):
minimum possible value of distribution
maxlim (float):
maximum possible value of distribution
Returns: tuple (valmin, valmax) where:
* valmin (float): lower edge of range containing prob
* valmax (float): upper edge of range containing prob
"""
loc = minlim
scale = maxlim - loc
valmin, valmax = beta.interval(prob, p, q, loc=loc, scale=scale)
return valmin, valmax
[docs]def get_pdfbeta(p, q, binedges, minlim=0, maxlim=1, npts=1000, openends=True):
"""
Return discretized pdf for plotting curve and report probabilities of
each bin
Args:
p (float): p shape factor of beta distribution (a in scipy)
q (float): q shape factor of beta distribution (b in scipy)
binedges (list): list of bin edges
minlim (float): minimum possible value of distribution
maxlim (float): maximum possible value of distribution
npts (int): number of points to return in xvals
openends (bool): assumes lower and upper bins don't have hard edges
Returns: tuple of (xvals, yvals, probs) where:
* xvals: list of log-distributed values
* yvals: corresponding list of
* probs (list): list of len(binedges)-1 that gives probability of
value falling in the corresponding bin
"""
loc = minlim
scale = maxlim - loc
xvals = np.logspace(np.log10(np.min(binedges)), np.log10(maxlim), npts)
yvals = beta.pdf(xvals, p, q, loc=loc, scale=scale)
# print(beta.mean(p, q, loc=loc, scale=scale))
probs = np.empty(len(binedges) - 1)
bincop = np.copy(binedges)
if openends:
bincop[0] = -np.inf
bincop[-1] = np.inf
for i in range(len(bincop) - 1):
min1 = beta.cdf(bincop[i], p, q, loc=loc, scale=scale)
max1 = beta.cdf(bincop[i + 1], p, q, loc=loc, scale=scale)
probs[i] = max1 - min1
return xvals, yvals, probs