Coverage for lst_auto_rta/utils/hdf5.py: 86%
18 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-17 14:47 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-17 14:47 +0000
1import time
2from pathlib import Path
3from typing import Any, Literal
5import pandas as pd
6import tables
9def pd_read_hdf_with_retry(
10 path_or_buf: str | Path | pd.HDFStore,
11 key: str | Any | None = None,
12 mode: Literal["r", "r+", "a"] = "r",
13 nb_tries: int = 20,
14 retry_wait_time_s: float = 0.5,
15 retry_on_os_error: bool = False,
16 *args,
17 **kwargs,
18) -> pd.DataFrame:
19 """Read a table in a hdf5 object into a pandas DataFrame
21 This is a wrapper around `pandas.read_hdf`, that will try to open the file several time in case a
22 HDF5 "Resource temporary unavailable" error is raised, which happens if the file is already opened
23 in write mode by another process.
24 Optionally, it can also re-try to open the file if an OSError is raised. This can be useful with shared
25 file systems: when the file system is overburdened, the metadata may take to long to be loaded and an
26 existing file can be reported as not present.
28 Parameters
29 ----------
30 path_or_buf : Path
31 Any valid string path is acceptable. Only supports the local file system, remote URLs and file-like objects
32 are not supported.
33 If you want to pass in a path object, pandas accepts any os.PathLike.
34 Alternatively, pandas accepts an open pandas.HDFStore object.
35 key : str | None, optional
36 The group identifier in the store. Can be omitted if the HDF file contains a single pandas object, by
37 default None
38 mode : str, optional
39 Mode to use when opening the file. Ignored if path_or_buf is a pandas.HDFStore. Default is 'r'.
40 nb_tries : int, optional
41 Number of times to attempt opening the file, by default 20
42 retry_wait_time_s : float, optional
43 Amount of time to wait, in seconds, between each opening attempt, by default 0.5.
44 retry_on_os_error : bool, optional
45 If True, the retry strategy will be used as well if opening the files fails with an OSError.
46 If False, only the tables.exceptions.HDF5ExtError (raised when ressource is unavailable) are caught.
47 By default False.
49 Returns
50 -------
51 pd.DataFrame
52 A DataFrame containing the table content.
53 """
54 for _ in range(nb_tries - 1):
55 try:
56 # Don't return right away or the else clause will execute !
57 df = pd.read_hdf(path_or_buf, key=key, mode=mode, *args, **kwargs)
58 break
59 except tables.exceptions.HDF5ExtError:
60 time.sleep(retry_wait_time_s)
61 except OSError:
62 if retry_on_os_error: 62 ↛ 65line 62 didn't jump to line 65 because the condition on line 62 was always true
63 time.sleep(retry_wait_time_s)
64 else:
65 raise
66 else:
67 # try 1 last time, let error raise if failing again
68 df = pd.read_hdf(path_or_buf, key=key, mode=mode, *args, **kwargs)
70 return df