Hdf5
A custom simple class to create, access, destroy hdf5 file
import pandas as pd
import numpy as np
import h5py
import os
from pandas import HDFStore
from datetime import datetime,timedelta,date
from dataclasses import dataclass
from typing import Any
from collections import defaultdict
class MyHDF5:
current_path = os.getcwd()
cursor = Any
zip_level = 0
hdf5_name = ""
def __init__(self,name:str,path:str = None,compressed_level:int = 0) -> pd.HDFStore:
self.hdf5_name = f"{name}.h5"
if compressed_level:
self.zip_level = compressed_level
if path:
os.chdir(path)
self.cursor = pd.HDFStore(self.hdf5_name,complevel=self.zip_level)
os.chdir(self.current_path)
self.cursor = pd.HDFStore(self.hdf5_name,complevel=self.zip_level)
print(self.cursor.info())
def insert_df(self,key,df,timestamp=False):
data_location = key
if timestamp==True:
time_label = datetime.now().strftime("%Y%m%d_%H%M%S")
# time_label = datetime.now().strftime("%Y%m%d")
data_location=f"{key}/{time_label}"
try:
print("Inserting...")
self.cursor.put(key=data_location,value=df)
print(f"Insert df success, key: <<{data_location}>>, dataset size: {get_mem_usage(df)}")
except Exception as e:
pass
# print(e)
def extract_data(self,key):
return self.cursor[key]
def remove_data(self,key):
try:
del self.cursor[key]
print(f"Remove dataset {key} success")
except KeyError:
raise KeyError(f"{key} not found")
def append_df(self,key,df:pd.DataFrame):
self.cursor[key] = pd.concat([self.cursor[key],df])
def __repr__(self)->None:
return(self.cursor.info())
def is_open(self)->bool:
return self.cursor.is_open
@property
def keys(self)->list:
return sorted(list(self.cursor.keys()))
def __getitem__(self,key:str)->"dataset":
return self.cursor[key]
def __setitem__(self,key,value)->None:
self.cursor[key]=value
def __delitem__(self,key)->None:
del self.cursor[key]
def __contains__(self, key: str) -> bool:
"""
check for existence of this key
can match the exact pathname or the pathnm w/o the leading '/'
"""
node = self.cursor.get_node(key)
if node is not None:
name = node._v_pathname
if name == key or name[1:] == key:
return True
return False
def __len__(self) -> int:
return len(self.cursor.groups())
def items(self):
"""
iterate on key->group
"""
for g in self.cursor.groups():
yield g._v_pathname, g
def close(self)->None:
try:
self.cursor.close()
print("Close success")
except Exception as e:
print(e)
def check_groups(self)->list:
return self.cursor.groups()
def destroy(self):
if self.is_open():
print("Close connection...")
self.close()
print(f"Destroy << {self.hdf5_name}>>... ")
os.remove(f"{self.hdf5_name}")
print("Finish")
Last updated