Source code for plasmapy.utils.data.downloader

"""
Contains functionality for downloading files from a URL. Intended for
downloading files from |PlasmaPy's data repository|.

"""

from pathlib import Path
from urllib.parse import urljoin

import requests

__all__ = ["get_file"]

# Note: GitHub links have a problem where the Content-Encoding is set to
# 'gzip' but the file is not actually compressed. This header is just ignored
# by the get_file function.
_BASE_URL = "https://raw.githubusercontent.com/PlasmaPy/PlasmaPy-data/main/"

# TODO: use a config file variable to allow users to set a location
# for the data download folder?


[docs] def get_file(basename, base_url=_BASE_URL, directory=None): r""" Download a file from a URL (if the file does not already exist) and return the full local path to the file. Parameters ---------- basename : str Name of the file to be downloaded (extension included). base_url : str, optional The base URL of the file to be downloaded. Defaults to the root directory of |PlasmaPy's data repository|. directory : str, optional The full path to the desired download location. Defaults to the default PlasmaPy data download directory :file:`plasmapy/utils/data/downloads/`\ . Returns ------- path : str The full local path to the downloaded file. """ if "." not in str(basename): raise ValueError(f"'filename' ({basename}) must include an extension.") if directory is None: directory = Path(Path.home(), ".plasmapy", "downloads") # Create the .plasmapy/downloads directory if it does not already # exist if not directory.is_dir(): directory.mkdir() path = Path(directory, basename) # If file doesn't exist locally, download it if not path.is_file(): url = urljoin(base_url, basename) reply = requests.get(url) # noqa: S113 if reply.status_code == 404: raise OSError( "The requested URL returned a 404 code, which " "likely indicates that the file does not exist at the " "URL provided." ) with path.open(mode="wb") as f: f.write(reply.content) return path