Coverage for lst_auto_rta/utils/hdf5.py: 86%

18 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-03 14:47 +0000

1import time 

2from pathlib import Path 

3from typing import Any, Literal 

4 

5import pandas as pd 

6import tables 

7 

8 

9def pd_read_hdf_with_retry( 

10 path_or_buf: str | Path | pd.HDFStore, 

11 key: str | Any | None = None, 

12 mode: Literal["r", "r+", "a"] = "r", 

13 nb_tries: int = 20, 

14 retry_wait_time_s: float = 0.5, 

15 retry_on_os_error: bool = False, 

16 *args, 

17 **kwargs, 

18) -> pd.DataFrame: 

19 """Read a table in a hdf5 object into a pandas DataFrame 

20 

21 This is a wrapper around `pandas.read_hdf`, that will try to open the file several time in case a 

22 HDF5 "Resource temporary unavailable" error is raised, which happens if the file is already opened 

23 in write mode by another process. 

24 Optionally, it can also re-try to open the file if an OSError is raised. This can be useful with shared 

25 file systems: when the file system is overburdened, the metadata may take to long to be loaded and an 

26 existing file can be reported as not present. 

27 

28 Parameters 

29 ---------- 

30 path_or_buf : Path 

31 Any valid string path is acceptable. Only supports the local file system, remote URLs and file-like objects 

32 are not supported. 

33 If you want to pass in a path object, pandas accepts any os.PathLike. 

34 Alternatively, pandas accepts an open pandas.HDFStore object. 

35 key : str | None, optional 

36 The group identifier in the store. Can be omitted if the HDF file contains a single pandas object, by 

37 default None 

38 mode : str, optional 

39 Mode to use when opening the file. Ignored if path_or_buf is a pandas.HDFStore. Default is 'r'. 

40 nb_tries : int, optional 

41 Number of times to attempt opening the file, by default 20 

42 retry_wait_time_s : float, optional 

43 Amount of time to wait, in seconds, between each opening attempt, by default 0.5. 

44 retry_on_os_error : bool, optional 

45 If True, the retry strategy will be used as well if opening the files fails with an OSError. 

46 If False, only the tables.exceptions.HDF5ExtError (raised when ressource is unavailable) are caught. 

47 By default False. 

48 

49 Returns 

50 ------- 

51 pd.DataFrame 

52 A DataFrame containing the table content. 

53 """ 

54 for _ in range(nb_tries - 1): 

55 try: 

56 # Don't return right away or the else clause will execute ! 

57 df = pd.read_hdf(path_or_buf, key=key, mode=mode, *args, **kwargs) 

58 break 

59 except tables.exceptions.HDF5ExtError: 

60 time.sleep(retry_wait_time_s) 

61 except OSError: 

62 if retry_on_os_error: 62 ↛ 65line 62 didn't jump to line 65 because the condition on line 62 was always true

63 time.sleep(retry_wait_time_s) 

64 else: 

65 raise 

66 else: 

67 # try 1 last time, let error raise if failing again 

68 df = pd.read_hdf(path_or_buf, key=key, mode=mode, *args, **kwargs) 

69 

70 return df