Source code for plasmapy.utils.data.downloader

"""
Contains functionality for downloading files from a URL. Intended for
downloading files from |PlasmaPy's data repository|.

"""

from pathlib import Path
from urllib.parse import urljoin

import requests

__all__ = ["get_file"]

# Note: GitHub links have a problem where the Content-Encoding is set to
# 'gzip' but the file is not actually compressed. This header is just ignored
# by the get_file function.
_BASE_URL = "https://raw.githubusercontent.com/PlasmaPy/PlasmaPy-data/main/"

# TODO: use a config file variable to allow users to set a location
# for the data download folder?



[docs]
def get_file(basename, base_url=_BASE_URL, directory=None):
    r"""
    Download a file from a URL (if the file does not already exist) and
    return the full local path to the file.

    Parameters
    ----------
    basename : str
        Name of the file to be downloaded (extension included).

    base_url : str, optional
        The base URL of the file to be downloaded. Defaults to the root
        directory of |PlasmaPy's data repository|.

    directory : str, optional
        The full path to the desired download location. Defaults to the
        default PlasmaPy data download directory
        :file:`plasmapy/utils/data/downloads/`\ .

    Returns
    -------
    path : str
        The full local path to the downloaded file.
    """

    if "." not in str(basename):
        raise ValueError(f"'filename' ({basename}) must include an extension.")

    if directory is None:
        directory = Path(Path.home(), ".plasmapy", "downloads")

        # Create the .plasmapy/downloads directory if it does not already
        # exist
        if not directory.is_dir():
            directory.mkdir()

    path = Path(directory, basename)

    # If file doesn't exist locally, download it
    if not path.is_file():
        url = urljoin(base_url, basename)

        reply = requests.get(url)  # noqa: S113

        if reply.status_code == 404:
            raise OSError(
                "The requested URL returned a 404 code, which "
                "likely indicates that the file does not exist at the "
                "URL provided."
            )

        with path.open(mode="wb") as f:
            f.write(reply.content)

    return path