Source code for floodestimation.fehdata

# -*- coding: utf-8 -*-

# Copyright (c) 2014  Florenz A.P. Hollebrandse <f.a.p.hollebrandse@protonmail.ch>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
This module provides methods to download a complete set of published gauged catchment data from the `National River Flow
Archive <http://www.ceh.ac.uk/data/nrfa/peakflow_overview.html>`_.

Downloaded data files are stored in a Cache folder under the user's application data folder. On Windows, this is folder
is located at `C:\\\\Users\\\\{Username}\\\\AppData\\\\Local\\\\Open Hydrology\\\\fehdata\\\\Cache`.

A typical data retrieval is as follows:

>>> from floodestimation import fehdata
>>> fehdata.clear_cache()
>>> fehdata.download_data()
>>> fehdata.unzip_data()

Data files can then be accessed as follows:

>>> cd3_files = fehdata.cd3_files()
>>> amax_files = fehdata.amax_files()

For parsing CD3 files and AMAX files see :mod:`floodestimation.parsers`.

"""


from urllib.request import urlopen, pathname2url
from urllib.error import URLError
from datetime import datetime, timedelta
import os
import shutil
import json
from zipfile import ZipFile
from distutils.version import LooseVersion
# Current package imports
from .settings import config


CACHE_FOLDER = config['DEFAULT']['cache_folder']
CACHE_ZIP = 'nrfa_data.zip'


def _retrieve_download_url():
    """
    Retrieves download location for FEH data zip file from hosted json configuration file.

    :return: URL for FEH data file
    :rtype: str
    """
    try:
        # Try to obtain the url from the Open Hydrology json config file.
        with urlopen(config['nrfa']['oh_json_url'], timeout=10) as f:
            remote_config = json.loads(f.read().decode('utf-8'))
        # This is just for testing, assuming a relative local file path starting with ./
        if remote_config['nrfa_url'].startswith('.'):
            remote_config['nrfa_url'] = 'file:' + pathname2url(os.path.abspath(remote_config['nrfa_url']))

        # Save retrieved config data
        _update_nrfa_metadata(remote_config)

        return remote_config['nrfa_url']
    except URLError:
        # If that fails (for whatever reason) use the fallback constant.
        return config['nrfa']['url']


[docs]def update_available(after_days=1):
    """
    Check whether updated NRFA data is available.

    :param after_days: Only check if not checked previously since a certain number of days ago
    :type after_days: float
    :return: `True` if update available, `False` if not, `None` if remote location cannot be reached.
    :rtype: bool or None
    """
    never_downloaded = not bool(config.get('nrfa', 'downloaded_on', fallback=None) or None)
    if never_downloaded:
        config.set_datetime('nrfa', 'update_checked_on', datetime.utcnow())
        config.save()
        return True

    last_checked_on = config.get_datetime('nrfa', 'update_checked_on', fallback=None) or datetime.fromtimestamp(0)
    if datetime.utcnow() < last_checked_on + timedelta(days=after_days):
        return False

    current_version = LooseVersion(config.get('nrfa', 'version', fallback='0') or '0')
    try:
        with urlopen(config['nrfa']['oh_json_url'], timeout=10) as f:
            remote_version = LooseVersion(json.loads(f.read().decode('utf-8'))['nrfa_version'])
        config.set_datetime('nrfa', 'update_checked_on', datetime.utcnow())
        config.save()
        return remote_version > current_version
    except URLError:
        return None


[docs]def download_data():
    """
    Downloads complete station dataset including catchment descriptors and amax records. And saves it into a cache
    folder.
    """
    with urlopen(_retrieve_download_url()) as f:
        with open(os.path.join(CACHE_FOLDER, CACHE_ZIP), "wb") as local_file:
            local_file.write(f.read())


def _update_nrfa_metadata(remote_config):
    """
    Save NRFA metadata to local config file using retrieved config data

    :param remote_config: Downloaded JSON data, not a ConfigParser object!
    """
    config['nrfa']['oh_json_url'] = remote_config['nrfa_oh_json_url']
    config['nrfa']['version'] = remote_config['nrfa_version']
    config['nrfa']['url'] = remote_config['nrfa_url']
    config.set_datetime('nrfa', 'published_on', datetime.utcfromtimestamp(remote_config['nrfa_published_on']))
    config.set_datetime('nrfa', 'downloaded_on', datetime.utcnow())
    config.set_datetime('nrfa', 'update_checked_on', datetime.utcnow())
    config.save()


[docs]def nrfa_metadata():
    """
    Return metadata on the NRFA data.

    Returned metadata is a dict with the following elements:

    - `url`: string with NRFA data download URL
    - `version`: string with NRFA version number, e.g. '3.3.4'
    - `published_on`: datetime of data release/publication (only month and year are accurate, rest should be ignored)
    - `downloaded_on`: datetime of last download

    :return: metadata
    :rtype: dict
    """
    result = {
        'url': config.get('nrfa', 'url', fallback=None) or None,  # Empty strings '' become None
        'version': config.get('nrfa', 'version', fallback=None) or None,
        'published_on': config.get_datetime('nrfa', 'published_on', fallback=None) or None,
        'downloaded_on': config.get_datetime('nrfa', 'downloaded_on', fallback=None) or None
    }
    return result


[docs]def unzip_data():
    """
    Extract all files from downloaded FEH data zip file.
    """
    with ZipFile(os.path.join(CACHE_FOLDER, CACHE_ZIP), 'r') as zf:
        zf.extractall(path=CACHE_FOLDER)


[docs]def clear_cache():
    """
    Delete all files from cache folder.
    """
    shutil.rmtree(CACHE_FOLDER)
    os.makedirs(CACHE_FOLDER)


[docs]def amax_files():
    """
    Return all annual maximum flow (`*.am`) files in cache folder and sub folders.

    :return: List of file paths
    :rtype: list
    """
    return [os.path.join(dp, f) for dp, dn, filenames in os.walk(CACHE_FOLDER)
            for f in filenames if os.path.splitext(f)[1].lower() == '.am']


[docs]def cd3_files():
    """
    Return all catchment descriptor files (`*.cd3`) files in cache folder and sub folders.

    :return: List of file paths
    :rtype: list
    """
    return [os.path.join(dp, f) for dp, dn, filenames in os.walk(CACHE_FOLDER)
            for f in filenames if os.path.splitext(f)[1].lower() == '.cd3']