My Note / Zeliang YAO
  • Zeliang's Note
  • Dremio
    • Custom Class
  • 💕Python
    • Design Pattern
      • Creational
        • Abstract Factory
        • Factory Method
        • Singleton
        • Builder / Director
      • Structural
        • Adapter
    • Boto3
    • Typing
    • String
    • Requests
    • Iterator & Iterable
      • Consuming iterator manually
      • Lazy Iterable
    • Genrators
    • itertools
    • Collections
    • Customization
      • Customize built-in
      • Logging
      • Hdf5
      • Sqlite3 & Df
    • Pandas
      • Basic
      • Data cleaning
      • Merge, Join, Concat
      • Useful tricks
      • Simple model
      • Pandas acceleration
    • Pandas time series
      • Date Range
      • Datetime Index
      • Holidays
      • Function_to_date_time
      • Period
      • Time zone
    • *args and**kwargs
    • Context Manager
    • Lambda
    • SHA
    • Multithreading
      • Threading
      • Speed Up
    • Email
    • Improvement
    • Useful functions
    • Python OOP
      • Basic
      • @static / @class method
      • attrs module
      • Dataclasses
      • Dataclasses example
      • Others
    • Design patterns
      • Creational Patterns
      • Structural Patterns
      • Behavioral Patterns
  • 🐣Git/Github
    • Commands
  • K8s
    • Useful commands
  • Linux
    • Chmod
Powered by GitBook
On this page

Was this helpful?

  1. Python
  2. Customization

Hdf5

A custom simple class to create, access, destroy hdf5 file


import pandas as pd
import numpy as np
import h5py
import os
from pandas import HDFStore
from datetime import datetime,timedelta,date
from dataclasses import dataclass
from typing import Any
from collections import defaultdict



class MyHDF5:
    
    current_path = os.getcwd()
    cursor =  Any
    zip_level = 0
    hdf5_name = ""
    def __init__(self,name:str,path:str = None,compressed_level:int = 0) -> pd.HDFStore:
        
        self.hdf5_name = f"{name}.h5"
        if compressed_level:
            self.zip_level = compressed_level
        if path:
            os.chdir(path)
            self.cursor = pd.HDFStore(self.hdf5_name,complevel=self.zip_level)
            os.chdir(self.current_path)
        self.cursor = pd.HDFStore(self.hdf5_name,complevel=self.zip_level)
        print(self.cursor.info())       
        
    def insert_df(self,key,df,timestamp=False):
        data_location = key
        if timestamp==True:
            time_label = datetime.now().strftime("%Y%m%d_%H%M%S")
#             time_label = datetime.now().strftime("%Y%m%d")
            data_location=f"{key}/{time_label}"
            
        try:
            print("Inserting...")
            self.cursor.put(key=data_location,value=df)
            print(f"Insert df success, key: <<{data_location}>>, dataset size: {get_mem_usage(df)}")
        except Exception as e:
            pass
#             print(e)

            
    def extract_data(self,key):
        return self.cursor[key]
    
    def remove_data(self,key):
        try:
            del self.cursor[key]
            print(f"Remove dataset {key} success")
        except KeyError:
            raise KeyError(f"{key} not found")
    def append_df(self,key,df:pd.DataFrame):
        self.cursor[key] = pd.concat([self.cursor[key],df])
        
        
    
    def __repr__(self)->None:
        return(self.cursor.info())
    
    def is_open(self)->bool:
        return self.cursor.is_open
    
    @property
    def keys(self)->list:
        return sorted(list(self.cursor.keys()))
    
    def __getitem__(self,key:str)->"dataset":
        return self.cursor[key]
        
    def __setitem__(self,key,value)->None:
        self.cursor[key]=value
    
    def __delitem__(self,key)->None:
        del self.cursor[key]
    
    def __contains__(self, key: str) -> bool:
        """
        check for existence of this key
        can match the exact pathname or the pathnm w/o the leading '/'
        """
        node = self.cursor.get_node(key)
        if node is not None:
            name = node._v_pathname
            if name == key or name[1:] == key:
                return True
        return False
    
    def __len__(self) -> int:
        return len(self.cursor.groups())
    
    def items(self):
        """
        iterate on key->group
        """
        for g in self.cursor.groups():
            yield g._v_pathname, g
            
    def close(self)->None:
        try:
            self.cursor.close()
            print("Close success")
        except Exception as e:
            print(e)
            
    def check_groups(self)->list:
        return self.cursor.groups()
    
    def destroy(self):
        if self.is_open():
            print("Close connection...")
            self.close()
        
        print(f"Destroy << {self.hdf5_name}>>... ")
        os.remove(f"{self.hdf5_name}")
        print("Finish")

PreviousLoggingNextSqlite3 & Df

Last updated 3 years ago

Was this helpful?

💕
Page cover image