Source code for floodestimation.analysis

# -*- coding: utf-8 -*-

# Copyright (c) 2014-2015 Florenz A.P. Hollebrandse <f.a.p.hollebrandse@protonmail.ch>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
Module containing flood estimation analysis methods, including QMED, growth curves etc.
"""
from math import log, exp, sqrt, floor, atan
from datetime import date
import copy
import lmoments3 as lm
import lmoments3.distr as lm_distr
import numpy as np
from numpy import linalg
from scipy import optimize


def valid_flows_array(catchment):
    """
    Return array of valid flows (i.e. excluding rejected years etc)

    :param catchment: gauged catchment with amax_records set
    :type catchment: :class:`floodestimation.entities.Catchment`
    :return: 1D array of flow values
    :rtype: :class:`numpy.ndarray`
    """
    return np.array([record.flow for record in catchment.amax_records if record.flag == 0])


[docs]class Analysis(object): """ Generic analysis object """ def __init__(self, year=None, results_log=None): """ :param year: Year to base analysis on. Default: current year. :type year: float :param results_log: Dict to store intermediate results :type results_log: dict """ self.year = year or date.today().year if results_log is not None: self.results_log = results_log else: self.results_log = {}
[docs]class QmedAnalysis(Analysis): """ Class to undertake QMED analyses. Example: >>> from floodestimation.entities import Catchment, Descriptors >>> from floodestimation.analysis import QmedAnalysis >>> catchment = Catchment("Aberdeen", "River Dee") >>> catchment.descriptors = Descriptors(dtm_area=1, bfihost=0.50, sprhost=50, saar=1000, farl=1, urbext2000=0) >>> QmedAnalysis(catchment).qmed_all_methods() {'descriptors': 0.5908579150223052, 'pot_records': None, 'channel_width': None, 'descriptors_1999': 0.2671386414098229, 'area': 1.172, 'amax_records': None} """ # : Methods available to estimate QMED, in order of best/preferred method methods = ('amax_records', 'pot_records', 'descriptors', 'descriptors_1999', 'area', 'channel_width') def __init__(self, catchment, gauged_catchments=None, year=None, results_log=None): """ :param catchment: subject catchment :type catchment: :class:`.entities.Catchment` :param gauged_catchments: catchment collections objects for retrieval of gauged data for donor analyses :type gauged_catchments: :class:`.collections.CatchmentCollections` """ Analysis.__init__(self, year, results_log) self.catchment = catchment self.gauged_catchments = gauged_catchments
[docs] def qmed(self, method='best', **method_options): """ Return QMED estimate using best available methodology depending on what catchment attributes are available. The preferred/best order of methods is defined by :attr:`qmed_methods`. Alternatively, a method can be supplied e.g. `method='descriptors_1999'` to force the use of a particular method. ================= ======================= ====================================================================== `method` `method_options` notes ================= ======================= ====================================================================== `amax_records` n/a Simple median of annual maximum flow records using `Catchment.amax_records`. `pot_records` n/a Uses peaks-over-threshold (POT) flow records. Suitable for flow records shorter than 14 years. `descriptors` Synonym for `method=descriptors2008`. `descriptors2008` `as_rural=False` FEH 2008 regression methodology using `Catchment.descriptors`. Setting `donor_catchments=None` `as_rural=True` returns rural estimate and setting `donor_catchments` to a specific list of :class:`Catchment` object **overrides** automatic selection of the most suitable donor catchment. An empty list forces no donors to be used at all. `descriptors1999` as_rural=False FEH 1999 regression methodology. `area` n/a Simplified FEH 1999 regression methodology using `Cachment.descriptors.dtm_area` only. `channel_width` n/a Emperical regression method using the river channel width only. ================= ======================= ====================================================================== :param method: methodology to use to estimate QMED. Default: automatically choose best method. :type method: str :param method_options: any optional parameters for the QMED method function, e.g. `as_rural=True` :type method_options: kwargs :return: QMED in m³/s :rtype: float """ if method == 'best': # Rules for gauged catchments if self.catchment.pot_dataset: if self.catchment.amax_records: if self.catchment.record_length <= self.catchment.pot_dataset.record_length < 14: use_method = 'pot_records' elif self.catchment.record_length >= 2: use_method = 'amax_records' else: use_method = None elif self.catchment.pot_dataset.record_length >= 1: use_method = 'pot_records' else: use_method = None elif self.catchment.record_length >= 2: use_method = 'amax_records' else: use_method = None # None of the gauged methods will work if use_method: self.results_log['method'] = use_method return getattr(self, '_qmed_from_' + use_method)() # Ungauged methods for method in self.methods[1:]: try: # Return the first method that works self.results_log['method'] = method return getattr(self, '_qmed_from_' + method)(**method_options) except (TypeError, InsufficientDataError): pass # In case none of them worked return None else: # A specific method has been requested try: self.results_log['method'] = method return getattr(self, '_qmed_from_' + method)(**method_options) except AttributeError: raise AttributeError("Method `{}` to estimate QMED does not exist.".format(method))
[docs] def qmed_all_methods(self): """ Returns a dict of QMED methods using all available methods. Available methods are defined in :attr:`qmed_methods`. The returned dict keys contain the method name, e.g. `amax_record` with value representing the corresponding QMED estimate in m³/s. :return: dict of QMED estimates :rtype: dict """ result = {} for method in self.methods: try: result[method] = getattr(self, '_qmed_from_' + method)() except: result[method] = None return result
def _qmed_from_amax_records(self): """ Return QMED estimate based on annual maximum flow records. :return: QMED in m³/s :rtype: float """ valid_flows = valid_flows_array(self.catchment) n = len(valid_flows) if n < 2: raise InsufficientDataError("Insufficient annual maximum flow records available for catchment {}." .format(self.catchment.id)) return np.median(valid_flows) def _qmed_from_pot_records(self): """ Return QMED estimate based on peaks-over-threshold (POT) records. Methodology source: FEH, Vol. 3, pp. 77-78 :return: QMED in m³/s :rtype: float """ pot_dataset = self.catchment.pot_dataset if not pot_dataset: raise InsufficientDataError("POT dataset must be set for catchment {} to estimate QMED from POT data." .format(self.catchment.id)) complete_year_records, length = self._complete_pot_years(pot_dataset) if length < 1: raise InsufficientDataError("Insufficient POT flow records available for catchment {}." .format(self.catchment.id)) position = 0.790715789 * length + 0.539684211 i = floor(position) w = 1 + i - position # This is equivalent to table 12.1! flows = [record.flow for record in complete_year_records] flows.sort(reverse=True) return w * flows[i - 1] + (1 - w) * flows[i] def _pot_month_counts(self, pot_dataset): """ Return a list of 12 sets. Each sets contains the years included in the POT record period. :param pot_dataset: POT dataset (records and meta data) :type pot_dataset: :class:`floodestimation.entities.PotDataset` """ periods = pot_dataset.continuous_periods() result = [set() for x in range(12)] for period in periods: year = period.start_date.year month = period.start_date.month while True: # Month by month, add the year result[month - 1].add(year) # If at end of period, break loop if year == period.end_date.year and month == period.end_date.month: break # Next month (and year if needed) month += 1 if month == 13: month = 1 year += 1 return result def _complete_pot_years(self, pot_dataset): """ Return a tuple of a list of :class:`PotRecord`s filtering out part-complete years; and the number of complete years. This method creates complete years by ensuring there is an equal number of each month in the entire record, taking into account data gaps. A month is considered to be covered by the record if there is at least a single day of the month in any continuous period. (There doesn't have to be a record!) "Leftover" months not needed are left at the beginning of the record, i.e. recent records are prioritised over older records. :param pot_dataset: POT dataset (records and meta data) :type pot_dataset: :class:`floodestimation.entities.PotDataset` :return: list of POT records :rtype: list of :class:`floodestimation.entities.PotRecord` """ # Create a list of 12 sets, one for each month. Each set represents the years for which the POT record includes # a particular month. month_counts = self._pot_month_counts(pot_dataset) # Number of complete years n = min([len(month) for month in month_counts]) # Use the last available years only such that there are equal numbers of each month; i.e. part years are # excluded at the beginning of the record years_to_use = [sorted(month)[-n:] for month in month_counts] return ( [record for record in pot_dataset.pot_records if record.date.year in years_to_use[record.date.month - 1]], n ) def _area_exponent(self): """ Methodology source: FEH, Vol. 3, p. 14 """ return 1 - 0.015 * log(2 * self.catchment.descriptors.dtm_area) def _residual_soil(self): """ Methodology source: FEH, Vol. 3, p. 14 """ return self.catchment.descriptors.bfihost \ + 1.3 * (0.01 * self.catchment.descriptors.sprhost) \ - 0.987 def _qmed_from_channel_width(self): """ Return QMED estimate based on watercourse channel width. Methodology source: FEH, Vol. 3, p. 25 :return: QMED in m³/s :rtype: float """ try: return 0.182 * self.catchment.channel_width ** 1.98 except TypeError: raise InsufficientDataError("Catchment `channel_width` attribute must be set first.") def _qmed_from_area(self): """ Return QMED estimate based on catchment area. TODO: add source of method :return: QMED in m³/s :rtype: float """ try: return 1.172 * self.catchment.descriptors.dtm_area ** self._area_exponent() # Area in km² except (TypeError, KeyError): raise InsufficientDataError("Catchment `descriptors` attribute must be set first.") def _qmed_from_descriptors_1999(self, as_rural=False): """ Return QMED estimation based on FEH catchment descriptors, 1999 methodology. Methodology source: FEH, Vol. 3, p. 14 :param as_rural: assume catchment is fully rural. Default: false. :type as_rural: bool :return: QMED in m³/s :rtype: float """ try: qmed_rural = 1.172 * self.catchment.descriptors.dtm_area ** self._area_exponent() \ * (self.catchment.descriptors.saar / 1000.0) ** 1.560 \ * self.catchment.descriptors.farl ** 2.642 \ * (self.catchment.descriptors.sprhost / 100.0) ** 1.211 * \ 0.0198 ** self._residual_soil() if as_rural: return qmed_rural else: return qmed_rural * self.urban_adj_factor() except (TypeError, KeyError): raise InsufficientDataError("Catchment `descriptors` attribute must be set first.") def _qmed_from_descriptors(self, **method_options): """ Alias for current method to estimated QMED from catchment descriptors, currently: `descriptors_2008` :param method_options: Options to passed to actual QMED method function :type method_options: kwargs :return: QMED in m³/s :rtype: float """ return self._qmed_from_descriptors_2008(**method_options) def _qmed_from_descriptors_2008(self, as_rural=False, donor_catchments=None): """ Return QMED estimation based on FEH catchment descriptors, 2008 methodology. Methodology source: Science Report SC050050, p. 36 :param as_rural: assume catchment is fully rural. Default: false. :type as rural: bool :param donor_catchments: override donor catchment to improve QMED catchment. If `None` (default), donor catchment will be searched automatically, if empty list, no donors will be used. :type donor_catchments: :class:`Catchment` :return: QMED in m³/s :rtype: float """ try: # Basis rural QMED from descriptors lnqmed_rural = 2.1170 \ + 0.8510 * log(self.catchment.descriptors.dtm_area) \ - 1.8734 * 1000 / self.catchment.descriptors.saar \ + 3.4451 * log(self.catchment.descriptors.farl) \ - 3.0800 * self.catchment.descriptors.bfihost ** 2.0 qmed_rural = exp(lnqmed_rural) # Log intermediate results self.results_log['qmed_descr_rural'] = qmed_rural if donor_catchments is None: # If no donor catchments are provided, find the nearest 25 donor_catchments = self.find_donor_catchments() if donor_catchments: # If found multiply rural estimate with weighted adjustment factors from all donors weights = self._vec_alpha(donor_catchments) errors = self._vec_lnqmed_residuals(donor_catchments) correction = np.dot(weights, errors) lnqmed_rural += correction qmed_rural = exp(lnqmed_rural) # Log intermediate results self.results_log['donors'] = donor_catchments for i, donor in enumerate(self.results_log['donors']): donor.weight = weights[i] donor.factor = exp(errors[i]) self.results_log['donor_adj_factor'] = exp(correction) self.results_log['qmed_adj_rural'] = qmed_rural if as_rural: return qmed_rural else: # Apply urbanisation adjustment urban_adj_factor = self.urban_adj_factor() # Log intermediate results self.results_log['qmed_descr_urban'] = self.results_log['qmed_descr_rural'] * urban_adj_factor return qmed_rural * urban_adj_factor except (TypeError, KeyError): raise InsufficientDataError("Catchment `descriptors` attribute must be set first.") def _pruaf(self): """ Return percentage runoff urban adjustment factor. Methodology source: eqn. 6, Kjeldsen 2010 """ return 1 + 0.47 * self.catchment.descriptors.urbext(self.year) \ * self.catchment.descriptors.bfihost / (1 - self.catchment.descriptors.bfihost)
[docs] def urban_adj_factor(self): """ Return urban adjustment factor (UAF) used to adjust QMED and growth curves. Methodology source: eqn. 8, Kjeldsen 2010 :return: urban adjustment factor :rtype: float """ urbext = self.catchment.descriptors.urbext(self.year) result = self._pruaf() ** 2.16 * (1 + urbext) ** 0.37 self.results_log['urban_extent'] = urbext self.results_log['urban_adj_factor'] = result return result
@staticmethod def _dist_corr(dist, phi1, phi2, phi3): """ Generic distance-decaying correlation function :param dist: Distance between catchment centrolds in km :type dist: float :param phi1: Decay function parameters 1 :type phi1: float :param phi2: Decay function parameters 2 :type phi2: float :param phi3: Decay function parameters 3 :type phi3: float :return: Correlation coefficient, r :rtype: float """ return phi1 * exp(-phi2 * dist) + (1 - phi1) * exp(-phi3 * dist) def _model_error_corr(self, catchment1, catchment2): """ Return model error correlation between subject catchment and other catchment. Methodology source: Kjeldsen & Jones, 2009, table 3 :param catchment1: catchment to calculate error correlation with :type catchment1: :class:`Catchment` :param catchment2: catchment to calculate error correlation with :type catchment2: :class:`Catchment` :return: correlation coefficient, r :rtype: float """ dist = catchment1.distance_to(catchment2) return self._dist_corr(dist, 0.3998, 0.0283, 0.9494) def _lnqmed_corr(self, catchment1, catchment2): """ Return model error correlation between subject catchment and other catchment. Methodology source: Kjeldsen & Jones, 2009, fig 3 :param catchment1: catchment to calculate correlation with :type catchment1: :class:`Catchment` :param catchment2: catchment to calculate correlation with :type catchment2: :class:`Catchment` :return: correlation coefficient, r :rtype: float """ dist = catchment1.distance_to(catchment2) return self._dist_corr(dist, 0.2791, 0.0039, 0.0632) def _vec_b(self, donor_catchments): """ Return vector ``b`` of model error covariances to estimate weights Methodology source: Kjeldsen, Jones and Morris, 2009, eqs 3 and 10 :param donor_catchments: Catchments to use as donors :type donor_catchments: list of :class:`Catchment` :return: Model error covariance vector :rtype: :class:`numpy.ndarray` """ p = len(donor_catchments) b = 0.1175 * np.ones(p) for i in range(p): b[i] *= self._model_error_corr(self.catchment, donor_catchments[i]) return b @staticmethod def _beta(catchment): """ Return beta, the GLO scale parameter divided by loc parameter estimated using simple regression model Methodology source: Kjeldsen & Jones, 2009, table 2 :param catchment: Catchment to estimate beta for :type catchment: :class:`Catchment` :return: beta :rtype: float """ lnbeta = -1.1221 \ - 0.0816 * log(catchment.descriptors.dtm_area) \ - 0.4580 * log(catchment.descriptors.saar / 1000) \ + 0.1065 * log(catchment.descriptors.bfihost) return exp(lnbeta) def _matrix_sigma_eta(self, donor_catchments): """ Return model error coveriance matrix Sigma eta Methodology source: Kjelsen, Jones & Morris 2014, eqs 2 and 3 :param donor_catchments: Catchments to use as donors :type donor_catchments: list of :class:`Catchment` :return: 2-Dimensional, symmetric covariance matrix :rtype: :class:`numpy.ndarray` """ p = len(donor_catchments) sigma = 0.1175 * np.ones((p, p)) for i in range(p): for j in range(p): if i != j: sigma[i, j] *= self._model_error_corr(donor_catchments[i], donor_catchments[j]) return sigma def _matrix_sigma_eps(self, donor_catchments): """ Return sampling error coveriance matrix Sigma eta Methodology source: Kjeldsen & Jones 2009, eq 9 :param donor_catchments: Catchments to use as donors :type donor_catchments: list of :class:`Catchment` :return: 2-Dimensional, symmetric covariance matrix :rtype: :class:`numpy.ndarray` """ p = len(donor_catchments) sigma = np.empty((p, p)) for i in range(p): beta_i = self._beta(donor_catchments[i]) n_i = donor_catchments[i].amax_records_end() - donor_catchments[i].amax_records_start() + 1 for j in range(p): beta_j = self._beta(donor_catchments[j]) n_j = donor_catchments[j].amax_records_end() - donor_catchments[j].amax_records_start() + 1 rho_ij = self._lnqmed_corr(donor_catchments[i], donor_catchments[j]) n_ij = min(donor_catchments[i].amax_records_end(), donor_catchments[j].amax_records_end()) - \ max(donor_catchments[i].amax_records_start(), donor_catchments[j].amax_records_start()) + 1 sigma[i, j] = 4 * beta_i * beta_j * n_ij / n_i / n_j * rho_ij return sigma def _matrix_omega(self, donor_catchments): return self._matrix_sigma_eta(donor_catchments) + self._matrix_sigma_eps(donor_catchments) def _vec_alpha(self, donor_catchments): """ Return vector alpha which is the weights for donor model errors Methodology source: Kjeldsen, Jones & Morris 2014, eq 10 :param donor_catchments: Catchments to use as donors :type donor_catchments: list of :class:`Catchment` :return: Vector of donor weights :rtype: :class:`numpy.ndarray` """ return np.dot(linalg.inv(self._matrix_omega(donor_catchments)), self._vec_b(donor_catchments)) @staticmethod def _lnqmed_residual(catchment): """ Return ln(QMED) model error at a gauged catchment :param catchment: Gauged catchment :type catchment: :class:`Catchment` :return: Model error :rtype: float """ analysis = QmedAnalysis(catchment, year=2000) # Probably should set the year to the midpoint of amax rec. logmedian_amax = log(analysis.qmed(method='amax_records')) logmedian_descr = log(analysis.qmed(method='descriptors')) return logmedian_amax - logmedian_descr def _vec_lnqmed_residuals(self, catchments): """ Return ln(QMED) model errors for a list of catchments :param catchments: List of gauged catchments :type catchments: list of :class:`Catchment` :return: Model errors :rtype: list of float """ result = np.empty(len(catchments)) for index, donor in enumerate(catchments): result[index] = self._lnqmed_residual(donor) return result
[docs] def find_donor_catchments(self, limit=6, dist_limit=500): """ Return a suitable donor catchment to improve a QMED estimate based on catchment descriptors alone. :param limit: maximum number of catchments to return. Default: 6. Set to `None` to return all available catchments. :type limit: int :param dist_limit: maximum distance in km. between subject and donor catchment. Default: 500 km. Increasing the maximum distance will increase computation time! :type dist_limit: float or int :return: list of nearby catchments :rtype: :class:`floodestimation.entities.Catchment` """ if self.gauged_catchments: return self.gauged_catchments.nearest_qmed_catchments(self.catchment, limit, dist_limit) else: return []
[docs]class GrowthCurveAnalysis(Analysis): """ Class to undertake a growth curve analysis. """ #: Methods available to estimate the growth curve methods = ('enhanced_single_site', 'single_site', 'pooling_group') #: Available distribution functions for growth curves distributions = ('glo', 'gev') def __init__(self, catchment, gauged_catchments=None, year=None, results_log=None): """ :param catchment: subject catchment :type catchment: :class:`.entities.Catchment` :param gauged_catchments: catchment collections objects for retrieval of gauged data for donor analyses :type gauged_catchments: :class:`.collections.CatchmentCollections` """ Analysis.__init__(self, year, results_log) self.catchment = catchment self.gauged_cachments = gauged_catchments #: List of donor catchments. Either set manually or by calling #: :meth:`.GrowthCurveAnalysis.find_donor_catchments` or implicitly when calling :meth:`.growth_curve()`. self.donor_catchments = []
[docs] def growth_curve(self, method='best', **method_options): """ Return QMED estimate using best available methodology depending on what catchment attributes are available. ====================== ====================== ================================================================== `method` `method_options` notes ====================== ====================== ================================================================== `enhanced_single_site` `distr='glo'` Preferred method for gauged catchments (i.e. with `as_rural=False` `Catchment.amax_record`). `single_site` `distr='glo'` Alternative method for gauged catchments. Uses AMAX data from subject station only. `pooling_group` `distr='glo'` Only possible method for ungauged catchments. `as_rural=False` ====================== ====================== ================================================================== :param method: methodology to use to estimate the growth curve. Default: automatically choose best method. :type method: str :param method_options: any optional parameters for the growth curve method function :type method_options: kwargs :return: Inverse cumulative distribution function, callable class with one parameter `aep` (annual exceedance probability) :type: :class:`.GrowthCurve` """ if method == 'best': if self.catchment.amax_records: # Gauged catchment, use enhanced single site self.results_log['method'] = 'enhanced_single_site' return self._growth_curve_enhanced_single_site() else: # Ungauged catchment, standard pooling group self.results_log['method'] = 'pooling_group' return self._growth_curve_pooling_group() else: try: self.results_log['method'] = 'method' return getattr(self, '_growth_curve_' + method)(**method_options) except AttributeError: raise AttributeError("Method `{}` to estimate the growth curve does not exist.".format(method))
@staticmethod def _dimensionless_flows(catchment): """ Return array of flows devided by QMED :param catchment: gauged catchment with amax_records set :type catchment: :class:`floodestimation.entities.Catchment` :return: 1D array of flow values devided by QMED :rtype: :class:`numpy.ndarray` """ flows = valid_flows_array(catchment) return flows / np.median(flows) def _var_and_skew(self, catchments, as_rural=False): """ Calculate L-CV and L-SKEW from a single catchment or a pooled group of catchments. Methodology source: Science Report SC050050, para. 6.4.1-6.4.2 """ if not hasattr(catchments, '__getitem__'): # In case of a single catchment l_cv, l_skew = self._l_cv_and_skew(self.catchment) self.results_log['donors'] = [] else: # Prepare arrays for donor L-CVs and L-SKEWs and their weights n = len(catchments) l_cvs = np.empty(n) l_skews = np.empty(n) l_cv_weights = np.empty(n) l_skew_weights = np.empty(n) # Fill arrays for index, donor in enumerate(catchments): l_cvs[index], l_skews[index] = self._l_cv_and_skew(donor) l_cv_weights[index] = self._l_cv_weight(donor) l_skew_weights[index] = self._l_skew_weight(donor) # Weighted averages of L-CV and l-SKEW l_cv_weights /= sum(l_cv_weights) # Weights sum to 1 # Special case if the first donor is the subject catchment itself, assumed if similarity distance == 0. if self._similarity_distance(self.catchment, catchments[0]) == 0: l_cv_weights *= self._l_cv_weight_factor() # Reduce weights of all donor catchments l_cv_weights[0] += 1 - sum(l_cv_weights) # But increase the weight of the subject catchment l_cv_rural = sum(l_cv_weights * l_cvs) l_skew_weights /= sum(l_skew_weights) # Weights sum to 1 l_skew_rural = sum(l_skew_weights * l_skews) self.results_log['l_cv_rural'] = l_cv_rural self.results_log['l_skew_rural'] = l_skew_rural if as_rural: l_cv = l_cv_rural l_skew = l_skew_rural else: # Method source: eqns. 10 and 11, Kjeldsen 2010 l_cv = l_cv_rural * 0.5547 ** self.catchment.descriptors.urbext(self.year) l_skew = (l_skew_rural + 1) * 1.1545 ** self.catchment.descriptors.urbext(self.year) - 1 # Record intermediate results (donors) self.results_log['donors'] = catchments total_record_length = 0 for index, donor in enumerate(self.results_log['donors']): donor.l_cv = l_cvs[index] donor.l_cv_weight = l_cv_weights[index] donor.l_skew = l_skews[index] donor.l_skew_weight = l_skew_weights[index] total_record_length += donor.record_length self.results_log['donors_record_length'] = total_record_length # Record intermediate results self.results_log['l_cv'] = l_cv self.results_log['l_skew'] = l_skew return l_cv, l_skew def _l_cv_and_skew(self, catchment): """ Calculate L-CV and L-SKEW for a gauged catchment. Uses `lmoments3` library. Methodology source: Science Report SC050050, para. 6.7.5 """ z = self._dimensionless_flows(catchment) l1, l2, t3 = lm.lmom_ratios(z, nmom=3) return l2 / l1, t3 def _l_cv_weight(self, donor_catchment): """ Return L-CV weighting for a donor catchment. Methodology source: Science Report SC050050, eqn. 6.18 and 6.22a """ try: dist = donor_catchment.similarity_dist except AttributeError: dist = self._similarity_distance(self.catchment, donor_catchment) b = 0.0047 * sqrt(dist) + 0.0023 / 2 c = 0.02609 / (donor_catchment.record_length - 1) return 1 / (b + c) def _l_cv_weight_factor(self): """ Return multiplier for L-CV weightings in case of enhanced single site analysis. Methodology source: Science Report SC050050, eqn. 6.15a and 6.15b """ b = 0.0047 * sqrt(0) + 0.0023 / 2 c = 0.02609 / (self.catchment.record_length - 1) return c / (b + c) def _l_skew_weight(self, donor_catchment): """ Return L-SKEW weighting for donor catchment. Methodology source: Science Report SC050050, eqn. 6.19 and 6.22b """ try: dist = donor_catchment.similarity_dist except AttributeError: dist = self._similarity_distance(self.catchment, donor_catchment) b = 0.0219 * (1 - exp(-dist / 0.2360)) c = 0.2743 / (donor_catchment.record_length - 2) return 1 / (b + c) def _growth_curve_single_site(self, distr='glo'): """ Return flood growth curve function based on `amax_records` from the subject catchment only. :return: Inverse cumulative distribution function with one parameter `aep` (annual exceedance probability) :type: :class:`.GrowthCurve` """ if self.catchment.amax_records: self.donor_catchments = [] return GrowthCurve(distr, *self._var_and_skew(self.catchment)) else: raise InsufficientDataError("Catchment's `amax_records` must be set for a single site analysis.") def _growth_curve_pooling_group(self, distr='glo', as_rural=False): """ Return flood growth curve function based on `amax_records` from a pooling group. :return: Inverse cumulative distribution function with one parameter `aep` (annual exceedance probability) :type: :class:`.GrowthCurve` :param as_rural: assume catchment is fully rural. Default: false. :type as rural: bool """ if not self.donor_catchments: self.find_donor_catchments() gc = GrowthCurve(distr, *self._var_and_skew(self.donor_catchments)) # Record intermediate results self.results_log['distr_name'] = distr.upper() self.results_log['distr_params'] = gc.params return gc def _growth_curve_enhanced_single_site(self, distr='glo', as_rural=False): """ Return flood growth curve function based on `amax_records` from the subject catchment and a pooling group. :return: Inverse cumulative distribution function with one parameter `aep` (annual exceedance probability) :type: :class:`.GrowthCurve` :param as_rural: assume catchment is fully rural. Default: false. :type as rural: bool """ if not self.donor_catchments: self.find_donor_catchments(include_subject_catchment='force') gc = GrowthCurve(distr, *self._var_and_skew(self.donor_catchments)) # Record intermediate results self.results_log['distr_name'] = distr.upper() self.results_log['distr_params'] = gc.params return gc #: Dict of weighting factors and standard deviation for catchment descriptors to use in calculating the similarity #: distance measure between the subject catchment and each donor catchment. The dict is structured like this: #: `{parameter: (weight, standard deviation, transform method)}`. The transform method is optional and is typically #: omitted or set to `log`. # TODO: calculate standard deviation from database similarity_params = {'dtm_area': (3.2, 1.28, log), # param: (weight, std_dev, transform method) 'saar': (0.5, 0.37, log), 'farl': (0.1, 0.05), 'fpext': (0.2, 0.04)} def _similarity_distance(self, subject_catchment, other_catchment): dist_sq = 0 for param, value in self.similarity_params.items(): try: weight = value[0] std_dev = value[1] try: transform = value[2] # A method! except IndexError: # If no transform method, just use linear transform = lambda x: x dist_sq += weight * ((transform(getattr(other_catchment.descriptors, param)) - transform(getattr(subject_catchment.descriptors, param))) / std_dev) ** 2 except TypeError: # If either of the catchments do not have the descriptor, assume infinitely large distance dist_sq += float('inf') return sqrt(dist_sq)
[docs] def find_donor_catchments(self, include_subject_catchment='auto'): """ Find list of suitable donor cachments, ranked by hydrological similarity distance measure. This method is implicitly called when calling the :meth:`.growth_curve` method unless the attribute :attr:`.donor_catchments` is set manually. The results are stored in :attr:`.donor_catchments`. The (list of) :class:`floodestimation.entities.Catchment` will have an additional attribute :attr:`similarity_dist`. :param include_subject_catchment: - `auto`: include subject catchment if suitable for pooling and if urbext2000 < 0.03 - `force`: always include subject catchment - `exclude`: do not include the subject catchment :type include_subject_catchment: str """ # Only if we have access to db with gauged catchment data if self.gauged_cachments: self.donor_catchments = self.gauged_cachments. \ most_similar_catchments(subject_catchment=self.catchment, similarity_dist_function=lambda c1, c2: self._similarity_distance(c1, c2), include_subject_catchment=include_subject_catchment) else: self.donor_catchments = []
[docs]class GrowthCurve(): """ Growth curve constructed using **sample** L-VAR and L-SKEW. The `GrowthCurve` class is callable, i.e. it can be used as a function. It has one parameter `aep`, which is an annual exceedance probability and can be either a single value or a list of values. In the latter case, a numpy :class:`ndarray` is returned. Example: >>> from floodestimation.analysis import GrowthCurve >>> growth_curve = GrowthCurve(distr='glo', var=0.2, skew=-0.1, kurtosis=0.185) >>> growth_curve(aep=0.5) 1.0 >>> growth_curve(aep=[0.1, 0.01, 0.001]) array([ 1.38805928, 1.72475593, 1.98119739]) >>> growth_curve.params [1.0, 0.1967263286166932, 0.1] >>> growth_curve.distr_kurtosis 0.175 >>> growth_curve.kurtosis_fit() 0.010000000000000009 """ def __init__(self, distr, var, skew, kurtosis=None): #: Statistical distribution function abbreviation, e.g. 'glo', 'gev'. Any supported by the :mod:`lmoments3` #: package can be used. self.distr = distr #: Sample L-variance (t2) self.var = var #: Sample L-skew (t3) self.skew = skew #: Sample L-kurtosis (t4, not used to create distribution function) self.kurtosis = kurtosis try: #: Statistical distribution as scipy `rv_continous` class, extended with L-moment methods. self.distr_f = getattr(lm_distr, distr) #: Distribution function parameter. Except for the `loc` parameter, all other parameters are estimated using #: the sample variance and skew linear moments. self.params = self.distr_f.lmom_fit(lmom_ratios=[1, var, skew]) self.params['loc'] = self._solve_location_param() #: The **distribution's** L-kurtosis which may be different from the **sample** L-kurtosis (t4). self.distr_kurtosis = self.distr_f.lmom_ratios(nmom=4, **self.params)[3] except AttributeError: raise InsufficientDataError("Distribution function `{}` does not exist.".format(distr)) def __call__(self, aep): return self.distr_f.ppf(1 - np.array(aep), **self.params) def _solve_location_param(self): """ We're lazy here and simply iterate to find the location parameter such that growth_curve(0.5)=1. """ params = copy.copy(self.params) del params['loc'] f = lambda location: self.distr_f.ppf(0.5, loc=location, **params) - 1 return optimize.brentq(f, -10, 10)
[docs] def kurtosis_fit(self): """ Estimate the goodness of fit by calculating the difference between sample L-kurtosis and distribution function L-kurtosis. :return: Goodness of fit measure :rtype: float """ try: return self.kurtosis - self.distr_kurtosis except TypeError: raise InsufficientDataError("The (sample) L-kurtosis must be set before the fit can be calculated.")
class InsufficientDataError(BaseException): pass