Module ocean_science_utilities.filecache.filecache
Contents: Simple file caching routines to interact with a file cache.
Copyright (C) 2022 Sofar Ocean Technologies
Authors: Pieter Bart Smit
Functions:
- filepaths()
, given URI's return a filepath to the locally stored
version
- exists()
, does a cache with a given name exists
- create_cache()
, create a cache with a given name and custom properties.
- delete_cache()
, delete files associated with the cache.
- delete_default()
, delete files associated with the default cache.
- delete_files()
, remove entries from a given cache.
- _get_cache
, get Cache object corresponding to the name (for internal use
only)
Expand source code
"""
Contents: Simple file caching routines to interact with a file cache.
Copyright (C) 2022
Sofar Ocean Technologies
Authors: Pieter Bart Smit
======================
Functions:
- `filepaths`, given URI's return a filepath to the locally stored
version
- `exists`, does a cache with a given name exists
- `create_cache`, create a cache with a given name and custom properties.
- `delete_cache`, delete files associated with the cache.
- `delete_default`, delete files associated with the default cache.
- `delete_files`, remove entries from a given cache.
- `_get_cache`, get Cache object corresponding to the name (for internal use
only)
"""
# Import
# =============================================================================
import os
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
from ocean_science_utilities.filecache.cache_object import (
TEMPORARY_DIRECTORY,
CACHE_SIZE_GB,
FileCache,
)
from ocean_science_utilities.filecache.remote_resources import RemoteResource
# Constants
# =============================================================================
DEFAULT_CACHE_NAME = "__default__"
# Private Module Variables
# =============================================================================
# This dictionary contains all instantiated FileCache objects as values and
# the object name as key.
_ACTIVE_FILE_CACHES = {} # type: Dict[str,FileCache]
# Main public function.
# =============================================================================
def set(name, value, cache_name: Optional[str] = None) -> None:
"""
Set cache value.
:name:
:value:
:param cache_name:
:return: None
"""
cache = get_cache(cache_name)
setattr(cache, name, value)
def filepaths(
uris: Union[List[str], str],
cache_name: Optional[str] = None,
) -> Union[List[str], Tuple[List[str], List[bool]]]:
"""
Return the full file path to locally stored objects corresponding to the given URI.
:param uris: List of uris, or a single uri
:param cache_name: name of the cache to use. If None, a default cache will
be initialized automatically (if not initialized) and used.
:param return_cache_hits: return whether or not the files were already in
cache or downloaded from the remote source (cache hit or miss).
:return: List Absolute paths to the locally stored versions corresponding
to the list of URI's. IF return_cache_hits=True, additionally return
a list of cache hits as the second entry of the return tuple.
"""
return get_cache(cache_name)[uris]
def remove_directive_function(directive: str, name: str, cache_name=None) -> None:
"""
EMPTY Doc String.
:directive:
:name:
:cache_name:
:return: None
"""
_ = get_cache(cache_name).remove_directive_function(directive, name)
def set_directive_function(
directive: str,
name: str,
post_process_function: Union[Callable[[str], None], Callable[[str], bool]],
cache_name=None,
) -> None:
"""
EMPTY Doc String.
:directive:
:name:
:post_process_function:
:cache_name:
:return: None
"""
_ = get_cache(cache_name).set_directive_function(
directive, name, post_process_function
)
def exists(cache_name: str) -> bool:
"""
Check if the cache name already exists.
:param cache_name: name for the cache to be created. This name is used
to retrieve files from the cache.
:return: True if exists, False otherwise
"""
return cache_name in _ACTIVE_FILE_CACHES
def create_cache(
cache_name: str,
cache_path: str = TEMPORARY_DIRECTORY,
cache_size_GB: Union[int, float] = CACHE_SIZE_GB,
do_cache_eviction_on_startup: bool = False,
download_in_parallel=True,
resources: Optional[List[RemoteResource]] = None,
) -> None:
"""
Create a file cache. Created caches *must* have unique names and cache_paths.
:param cache_name: name for the cache to be created. This name is used
to retrieve files from the cache.
:param cache_path: path to store cache. If path does not exist it will be
created.
:param cache_size_GB: Maximum size of the cache in GiB. If cache exceeds
the size, then files with oldest access/modified dates get deleted
until everthing fits in the cache again. Fractional values (floats)
are allowed.
:param do_cache_eviction_on_startup: do_cache_eviction_on_startup: whether
we ensure the cache size conforms to the given size on startup.
If set to true, a cache directory that exceeds the maximum size
will be reduced to max size. Set to False by default in which case
an error occurs. The latter to prevent eroneously evicting files
from a cache that was previously created on purpose with a larger
size that the limit.
:param download_in_parallel: Download in paralel from resource. Per default 10
worker threads are created.
:return:
"""
cache_path = os.path.abspath(os.path.expanduser(cache_path))
if cache_name in _ACTIVE_FILE_CACHES:
raise ValueError(f"Cache with name {cache_name} is already initialized")
for key, cache in _ACTIVE_FILE_CACHES.items():
if cache.path == cache_path:
raise ValueError(
f"Error when creating cache with name: "
f'"{cache_name}". \n A cache named: "{key}" '
f"already uses the path {cache_path} "
f"for caching.\n "
f"Multiple caches cannot share the same path."
)
_ACTIVE_FILE_CACHES[cache_name] = FileCache(
cache_path,
size_GB=cache_size_GB,
do_cache_eviction_on_startup=do_cache_eviction_on_startup,
parallel=download_in_parallel,
resources=resources,
)
return
def delete_cache(cache_name):
"""
Delete all files associated with a cache and remove cache from available caches.
To note: all files are deleted, but the folder itself is not.
:param cache_name: Name of the cache to be deleted
:return:
"""
if not exists(cache_name):
raise ValueError(f"Cache with name {cache_name} does not exist")
cache = _ACTIVE_FILE_CACHES.pop(cache_name)
cache.purge()
def delete_default():
"""
Clean up the default cache.
:return:
"""
if exists(DEFAULT_CACHE_NAME):
delete_cache(DEFAULT_CACHE_NAME)
def delete_files(
uris: Union[str, Iterable[str]],
cache_name: Optional[str] = None,
error_if_not_in_cache: bool = True,
) -> None:
"""
Remove given key(s) from the cache.
:param uris: list of keys to remove
:param cache_name: name of initialized cache.
:return:
"""
if not isinstance(uris, Iterable) or isinstance(uris, str):
uris = [uris]
cache = get_cache(cache_name)
for key in uris:
try:
cache.remove(key)
except ValueError as e:
if error_if_not_in_cache:
raise e
def get_cache(cache_name: Optional[str]) -> FileCache:
"""
Get a valid cache object, error if the name does not exist.
:param cache_name: Name of the cache
:return: Cache object
"""
if cache_name is None:
cache_name = DEFAULT_CACHE_NAME
if not exists(cache_name):
if cache_name == DEFAULT_CACHE_NAME:
create_cache(cache_name)
else:
raise ValueError(f"Cache with name {cache_name} does not exist.")
return _ACTIVE_FILE_CACHES[cache_name]
Functions
def create_cache(cache_name: str, cache_path: str = '~/temporary_roguewave_files/filecache/', cache_size_GB: Union[float, int] = 5, do_cache_eviction_on_startup: bool = False, download_in_parallel=True, resources: Optional[List[RemoteResource]] = None) ‑> None
-
Create a file cache. Created caches must have unique names and cache_paths.
:param cache_name: name for the cache to be created. This name is used to retrieve files from the cache. :param cache_path: path to store cache. If path does not exist it will be created. :param cache_size_GB: Maximum size of the cache in GiB. If cache exceeds the size, then files with oldest access/modified dates get deleted until everthing fits in the cache again. Fractional values (floats) are allowed. :param do_cache_eviction_on_startup: do_cache_eviction_on_startup: whether we ensure the cache size conforms to the given size on startup. If set to true, a cache directory that exceeds the maximum size will be reduced to max size. Set to False by default in which case an error occurs. The latter to prevent eroneously evicting files from a cache that was previously created on purpose with a larger size that the limit. :param download_in_parallel: Download in paralel from resource. Per default 10 worker threads are created.
:return:
Expand source code
def create_cache( cache_name: str, cache_path: str = TEMPORARY_DIRECTORY, cache_size_GB: Union[int, float] = CACHE_SIZE_GB, do_cache_eviction_on_startup: bool = False, download_in_parallel=True, resources: Optional[List[RemoteResource]] = None, ) -> None: """ Create a file cache. Created caches *must* have unique names and cache_paths. :param cache_name: name for the cache to be created. This name is used to retrieve files from the cache. :param cache_path: path to store cache. If path does not exist it will be created. :param cache_size_GB: Maximum size of the cache in GiB. If cache exceeds the size, then files with oldest access/modified dates get deleted until everthing fits in the cache again. Fractional values (floats) are allowed. :param do_cache_eviction_on_startup: do_cache_eviction_on_startup: whether we ensure the cache size conforms to the given size on startup. If set to true, a cache directory that exceeds the maximum size will be reduced to max size. Set to False by default in which case an error occurs. The latter to prevent eroneously evicting files from a cache that was previously created on purpose with a larger size that the limit. :param download_in_parallel: Download in paralel from resource. Per default 10 worker threads are created. :return: """ cache_path = os.path.abspath(os.path.expanduser(cache_path)) if cache_name in _ACTIVE_FILE_CACHES: raise ValueError(f"Cache with name {cache_name} is already initialized") for key, cache in _ACTIVE_FILE_CACHES.items(): if cache.path == cache_path: raise ValueError( f"Error when creating cache with name: " f'"{cache_name}". \n A cache named: "{key}" ' f"already uses the path {cache_path} " f"for caching.\n " f"Multiple caches cannot share the same path." ) _ACTIVE_FILE_CACHES[cache_name] = FileCache( cache_path, size_GB=cache_size_GB, do_cache_eviction_on_startup=do_cache_eviction_on_startup, parallel=download_in_parallel, resources=resources, ) return
def delete_cache(cache_name)
-
Delete all files associated with a cache and remove cache from available caches.
To note: all files are deleted, but the folder itself is not.
:param cache_name: Name of the cache to be deleted :return:
Expand source code
def delete_cache(cache_name): """ Delete all files associated with a cache and remove cache from available caches. To note: all files are deleted, but the folder itself is not. :param cache_name: Name of the cache to be deleted :return: """ if not exists(cache_name): raise ValueError(f"Cache with name {cache_name} does not exist") cache = _ACTIVE_FILE_CACHES.pop(cache_name) cache.purge()
def delete_default()
-
Clean up the default cache.
:return:
Expand source code
def delete_default(): """ Clean up the default cache. :return: """ if exists(DEFAULT_CACHE_NAME): delete_cache(DEFAULT_CACHE_NAME)
def delete_files(uris: Union[str, Iterable[str]], cache_name: Optional[str] = None, error_if_not_in_cache: bool = True) ‑> None
-
Remove given key(s) from the cache.
:param uris: list of keys to remove :param cache_name: name of initialized cache. :return:
Expand source code
def delete_files( uris: Union[str, Iterable[str]], cache_name: Optional[str] = None, error_if_not_in_cache: bool = True, ) -> None: """ Remove given key(s) from the cache. :param uris: list of keys to remove :param cache_name: name of initialized cache. :return: """ if not isinstance(uris, Iterable) or isinstance(uris, str): uris = [uris] cache = get_cache(cache_name) for key in uris: try: cache.remove(key) except ValueError as e: if error_if_not_in_cache: raise e
def exists(cache_name: str) ‑> bool
-
Check if the cache name already exists.
:param cache_name: name for the cache to be created. This name is used to retrieve files from the cache. :return: True if exists, False otherwise
Expand source code
def exists(cache_name: str) -> bool: """ Check if the cache name already exists. :param cache_name: name for the cache to be created. This name is used to retrieve files from the cache. :return: True if exists, False otherwise """ return cache_name in _ACTIVE_FILE_CACHES
def filepaths(uris: Union[List[str], str], cache_name: Optional[str] = None) ‑> Union[List[str], Tuple[List[str], List[bool]]]
-
Return the full file path to locally stored objects corresponding to the given URI.
:param uris: List of uris, or a single uri :param cache_name: name of the cache to use. If None, a default cache will be initialized automatically (if not initialized) and used. :param return_cache_hits: return whether or not the files were already in cache or downloaded from the remote source (cache hit or miss).
:return: List Absolute paths to the locally stored versions corresponding to the list of URI's. IF return_cache_hits=True, additionally return a list of cache hits as the second entry of the return tuple.
Expand source code
def filepaths( uris: Union[List[str], str], cache_name: Optional[str] = None, ) -> Union[List[str], Tuple[List[str], List[bool]]]: """ Return the full file path to locally stored objects corresponding to the given URI. :param uris: List of uris, or a single uri :param cache_name: name of the cache to use. If None, a default cache will be initialized automatically (if not initialized) and used. :param return_cache_hits: return whether or not the files were already in cache or downloaded from the remote source (cache hit or miss). :return: List Absolute paths to the locally stored versions corresponding to the list of URI's. IF return_cache_hits=True, additionally return a list of cache hits as the second entry of the return tuple. """ return get_cache(cache_name)[uris]
def get_cache(cache_name: Optional[str]) ‑> FileCache
-
Get a valid cache object, error if the name does not exist.
:param cache_name: Name of the cache :return: Cache object
Expand source code
def get_cache(cache_name: Optional[str]) -> FileCache: """ Get a valid cache object, error if the name does not exist. :param cache_name: Name of the cache :return: Cache object """ if cache_name is None: cache_name = DEFAULT_CACHE_NAME if not exists(cache_name): if cache_name == DEFAULT_CACHE_NAME: create_cache(cache_name) else: raise ValueError(f"Cache with name {cache_name} does not exist.") return _ACTIVE_FILE_CACHES[cache_name]
def remove_directive_function(directive: str, name: str, cache_name=None) ‑> None
-
EMPTY Doc String.
:directive: :name: :cache_name:
:return: None
Expand source code
def remove_directive_function(directive: str, name: str, cache_name=None) -> None: """ EMPTY Doc String. :directive: :name: :cache_name: :return: None """ _ = get_cache(cache_name).remove_directive_function(directive, name)
def set(name, value, cache_name: Optional[str] = None) ‑> None
-
Set cache value.
:name: :value: :param cache_name:
:return: None
Expand source code
def set(name, value, cache_name: Optional[str] = None) -> None: """ Set cache value. :name: :value: :param cache_name: :return: None """ cache = get_cache(cache_name) setattr(cache, name, value)
def set_directive_function(directive: str, name: str, post_process_function: Union[Callable[[str], None], Callable[[str], bool]], cache_name=None) ‑> None
-
EMPTY Doc String.
:directive: :name: :post_process_function: :cache_name:
:return: None
Expand source code
def set_directive_function( directive: str, name: str, post_process_function: Union[Callable[[str], None], Callable[[str], bool]], cache_name=None, ) -> None: """ EMPTY Doc String. :directive: :name: :post_process_function: :cache_name: :return: None """ _ = get_cache(cache_name).set_directive_function( directive, name, post_process_function )