Source code for lst_auto_rta.utils.hdf5

import time
from pathlib import Path
from typing import Any, Literal

import pandas as pd
import tables


[docs] def pd_read_hdf_with_retry( path_or_buf: str | Path | pd.HDFStore, key: str | Any | None = None, mode: Literal["r", "r+", "a"] = "r", nb_tries: int = 20, retry_wait_time_s: float = 0.5, retry_on_os_error: bool = False, *args, **kwargs, ) -> pd.DataFrame: """Read a table in a hdf5 object into a pandas DataFrame This is a wrapper around `pandas.read_hdf`, that will try to open the file several time in case a HDF5 "Resource temporary unavailable" error is raised, which happens if the file is already opened in write mode by another process. Optionally, it can also re-try to open the file if an OSError is raised. This can be useful with shared file systems: when the file system is overburdened, the metadata may take to long to be loaded and an existing file can be reported as not present. Parameters ---------- path_or_buf : Path Any valid string path is acceptable. Only supports the local file system, remote URLs and file-like objects are not supported. If you want to pass in a path object, pandas accepts any os.PathLike. Alternatively, pandas accepts an open pandas.HDFStore object. key : str | None, optional The group identifier in the store. Can be omitted if the HDF file contains a single pandas object, by default None mode : str, optional Mode to use when opening the file. Ignored if path_or_buf is a pandas.HDFStore. Default is 'r'. nb_tries : int, optional Number of times to attempt opening the file, by default 20 retry_wait_time_s : float, optional Amount of time to wait, in seconds, between each opening attempt, by default 0.5. retry_on_os_error : bool, optional If True, the retry strategy will be used as well if opening the files fails with an OSError. If False, only the tables.exceptions.HDF5ExtError (raised when ressource is unavailable) are caught. By default False. Returns ------- pd.DataFrame A DataFrame containing the table content. """ for _ in range(nb_tries - 1): try: # Don't return right away or the else clause will execute ! df = pd.read_hdf(path_or_buf, key=key, mode=mode, *args, **kwargs) break except tables.exceptions.HDF5ExtError: time.sleep(retry_wait_time_s) except OSError: if retry_on_os_error: time.sleep(retry_wait_time_s) else: raise else: # try 1 last time, let error raise if failing again df = pd.read_hdf(path_or_buf, key=key, mode=mode, *args, **kwargs) return df