data_management.py

# Copyright 2022 Valentin Bruch <valentin.bruch@rwth-aachen.de>
# License: MIT
"""
Kondo FRTRG, data management module

This file contains functions and classes to manage data generated using the
kondo module.

General concepts:
* All metadata are stored in an SQL database.
* Floquet matrices are stored in HDF5 files.
* Each HDF5 file can contain multiple data points. Data points can be added to
  HDF5 files.
* Each HDF5 file contains a table of metadata for the data points stored in
  this file.
* Data points are identified in HDF5 files by a hash generated from their full
  Floquet matrices at the end of the RG flow.
* The SQL database stores the directory, filename, and hash where the Floquet
  matrices are stored.

Implementation:
* pandas for accessing the SQL database and managing the full table of metadata
* pytables for HDF5 files
* a file "filename.lock" is temporarily created when writing to a HDF5 file.
"""

import os
import tables as tb
import pathlib
from time import sleep
from datetime import datetime
import numpy as np
import pandas as pd
import sqlalchemy as db
import random
import warnings
import settings

# We use hashs as identifiers for data points in HDF5 files. These hashs are
# often not valid python names, which causes a warning. We ignore this warning.
warnings.simplefilter("ignore", tb.NaturalNameWarning)

def random_string(length : int):
    """
    Generate random strings of alphanumerical characters with given length.
    """
    res = ""
    for _ in range(length):
        x = random.randint(0, 61)
        if x < 10:
            res += chr(x + 48)
        elif x < 36:
            res += chr(x + 55)
        else:
            res += chr(x + 61)
    return res


def replace_all(string:str, replacements:dict):
    """
    Apply all replacements to string
    """
    for old, new in replacements.items():
        string = string.replace(old, new)
    return string


class KondoExport:
    """
    Class for saving Kondo object to file.
    Example usage:
    >>> kondo = Kondo(...)
    >>> kondo.run(...)
    >>> KondoExport(kondo).save_h5("data/frtrg-01.h5")
    """
    METHOD_ENUM = tb.Enum(('unknown', 'mu', 'J', 'J-compact-1', 'J-compact-2', 'mu-reference', 'J-reference', 'mu-extrap-voltage', 'J-extrap-voltage'))
    SOLVER_METHOD_ENUM = tb.Enum(('unknown', 'RK45', 'RK23', 'DOP853', 'Radau', 'BDF', 'LSODA', 'other'))

    def __init__(self, kondo):
        self.kondo = kondo

    @property
    def hash(self):
        """
        hash based on Floquet matrices in Kondo object
        """
        try:
            return self._hash
        except AttributeError:
            self._hash = self.kondo.hash()[:40]
            return self._hash

    @property
    def metadata(self):
        """
        dictionary of metadata
        """
        # Determine method
        if self.kondo.unitary_transformation:
            if self.kondo.compact == 2:
                method = 'J-compact-2'
            elif self.kondo.compact == 1:
                method = 'J-compact-1'
            else:
                method = 'J'
        else:
            method = 'mu'

        # Collect solver flags
        solver_flags = 0
        try:
            if self.kondo.simplified_initial_conditions:
                solver_flags |= DataManager.SOLVER_FLAGS["simplified_initial_conditions"]
        except AttributeError:
            pass
        try:
            if self.kondo.include_Ga:
                solver_flags |= DataManager.SOLVER_FLAGS["include_Ga"]
        except AttributeError:
            pass
        try:
            if self.kondo.solve_integral_exactly:
                solver_flags |= DataManager.SOLVER_FLAGS["solve_integral_exactly"]
        except AttributeError:
            pass
        try:
            if self.kondo.truncation_order == 2:
                solver_flags |= DataManager.SOLVER_FLAGS["second_order_rg_equations"]
            elif self.kondo.truncation_order != 3:
                settings.logger.warning("Invalid truncation order: %s"%self.kondo.truncation_order)
        except AttributeError:
            pass
        for (key, value) in self.kondo.global_settings.items():
            if value:
                try:
                    solver_flags |= DataManager.SOLVER_FLAGS[key.lower()]
                except KeyError:
                    pass

        version = self.kondo.global_settings["VERSION"]
        return dict(
                hash = self.hash,
                omega = self.kondo.omega,
                energy = self.kondo.energy,
                version_major = version[0],
                version_minor = version[1],
                lazy_inverse_factor = self.kondo.global_settings["LAZY_INVERSE_FACTOR"],
                git_commit_count = version[2],
                git_commit_id = version[3],
                method = method,
                timestamp = datetime.utcnow().timestamp(),
                solver_method = getattr(self.kondo, 'solveopts', {}).get('method', 'unknown'),
                solver_tol_abs = getattr(self.kondo, 'solveopts', {}).get('atol', -1),
                solver_tol_rel = getattr(self.kondo, 'solveopts', {}).get('rtol', -1),
                integral_method = getattr(self.kondo, "integral_method", -15),
                d = self.kondo.d,
                vdc = self.kondo.vdc,
                vac = self.kondo.vac,
                xL = self.kondo.xL,
                nmax = self.kondo.nmax,
                padding = self.kondo.padding,
                voltage_branches = self.kondo.voltage_branches,
                resonant_dc_shift = self.kondo.resonant_dc_shift,
                solver_flags = solver_flags,
                )

    @property
    def main_results(self):
        """
        dictionary of main results: DC current, DC conductance, AC current (absolute value and phase)
        """
        results = dict(
                gamma = np.nan,
                dc_current = np.nan,
                dc_conductance = np.nan,
                ac_current_abs = np.nan,
                ac_current_phase = np.nan
                )
        nmax = self.kondo.nmax
        vb = self.kondo.voltage_branches
        if self.kondo.compact == 0:
            try:
                results['gamma'] = self.kondo.gamma[vb, nmax, nmax].real
            except:
                pass
            try:
                results['dc_current'] = self.kondo.gammaL[nmax, nmax].real
            except:
                pass
            try:
                results['dc_conductance'] = self.kondo.deltaGammaL[nmax, nmax].real
            except:
                pass
            if nmax == 0:
                results['ac_current_abs'] = 0
            else:
                try:
                    results['ac_current_abs'] = np.abs(self.kondo.gammaL[nmax-1, nmax])
                    results['ac_current_phase'] = np.angle(self.kondo.gammaL[nmax-1, nmax])
                except:
                    pass
        elif self.kondo.compact:
            results['dc_current'] = 0
            if nmax % 2:
                try:
                    results['gamma'] = self.kondo.gamma.submatrix11[nmax//2, nmax//2].real
                except:
                    pass
                try:
                    results['dc_conductance'] = self.kondo.deltaGammaL.submatrix11[nmax//2, nmax//2].real
                except:
                    pass
                try:
                    results['ac_current_abs'] = np.abs(self.kondo.gammaL.submatrix01[nmax//2, nmax//2])
                    results['ac_current_phase'] = np.angle(self.kondo.gammaL.submatrix01[nmax//2, nmax//2])
                except:
                    pass
            else:
                try:
                    results['gamma'] = self.kondo.gamma.submatrix00[nmax//2, nmax//2].real
                except:
                    pass
                try:
                    results['dc_conductance'] = self.kondo.deltaGammaL.submatrix00[nmax//2, nmax//2].real
                except:
                    pass
                try:
                    results['ac_current_abs'] = np.abs(self.kondo.gammaL.submatrix10[nmax//2-1, nmax//2])
                    results['ac_current_phase'] = np.angle(self.kondo.gammaL.submatrix10[nmax//2-1, nmax//2])
                except:
                    pass
        return results

    @property
    def fourier_coef(self):
        return self.kondo.fourier_coef

    def data(self, include='all'):
        """
        dictionary of Floquet matrices as numpy arrays.

        Argument include takes the following values:
        "all":      save all data (Floquet matrices including voltage shifts)
        "reduced":  exclude voltage shifts and yL
        "observables: save only gamma, gammaL, deltaGammaL, excluding voltage
                    shifts
        "minimal":  save only central column of Floquet matrices for gamma,
                    gammaL, deltaGammaL, excluding voltage
        """
        if include == 'all':
            save = dict(
                    gamma = self.kondo.gamma.values,
                    z = self.kondo.z.values,
                    gammaL = self.kondo.gammaL.values,
                    deltaGammaL = self.kondo.deltaGammaL.values,
                    deltaGamma = self.kondo.deltaGamma.values,
                    yL = self.kondo.yL.values,
                    g2 = self.kondo.g2.to_numpy_array(),
                    g3 = self.kondo.g3.to_numpy_array(),
                    current = self.kondo.current.to_numpy_array(),
                    )
            if self.kondo.include_Ga:
                save["ga"] = self.kondo.ga.to_numpy_array()
        elif include == 'reduced':
            if self.kondo.voltage_branches:
                vb = self.kondo.voltage_branches
                save = dict(
                        gamma = self.kondo.gamma[vb],
                        z = self.kondo.z[vb],
                        gammaL = self.kondo.gammaL.values,
                        deltaGammaL = self.kondo.deltaGammaL.values,
                        deltaGamma = self.kondo.deltaGamma[min(vb,1)],
                        g2 = self.kondo.g2.to_numpy_array()[:,:,vb],
                        g3 = self.kondo.g3.to_numpy_array()[:,:,vb],
                        current = self.kondo.current.to_numpy_array(),
                        )
                if self.kondo.include_Ga:
                    save["ga"] = self.kondo.ga.to_numpy_array()[:,:,vb]
            else:
                save = dict(
                        gamma = self.kondo.gamma.values,
                        z = self.kondo.z.values,
                        gammaL = self.kondo.gammaL.values,
                        deltaGammaL = self.kondo.deltaGammaL.values,
                        deltaGamma = self.kondo.deltaGamma.values,
                        g2 = self.kondo.g2.to_numpy_array(),
                        g3 = self.kondo.g3.to_numpy_array(),
                        current = self.kondo.current.to_numpy_array(),
                        )
                if self.kondo.include_Ga:
                    save["ga"] = self.kondo.ga.to_numpy_array()
        elif include == 'observables':
            if self.kondo.voltage_branches:
                vb = self.kondo.voltage_branches
                save = dict(
                        gamma = self.kondo.gamma[vb],
                        gammaL = self.kondo.gammaL.values,
                        deltaGammaL = self.kondo.deltaGammaL.values,
                        )
            else:
                save = dict(
                        gamma = self.kondo.gamma.values,
                        gammaL = self.kondo.gammaL.values,
                        deltaGammaL = self.kondo.deltaGammaL.values,
                        )
        elif include == 'minimal':
            nmax = self.kondo.nmax
            if self.kondo.voltage_branches:
                vb = self.kondo.voltage_branches
                save = dict(
                        gamma = self.kondo.gamma[vb,:,nmax],
                        gammaL = self.kondo.gammaL[:,nmax],
                        deltaGammaL = self.kondo.deltaGammaL[:,nmax],
                        )
            else:
                save = dict(
                        gamma = self.kondo.gamma[:,nmax],
                        gammaL = self.kondo.gammaL[:,nmax],
                        deltaGammaL = self.kondo.deltaGammaL[:,nmax],
                        )
        else:
            raise ValueError("Unknown value for include: " + include)
        return save

    def save_npz(self, filename, include="all"):
        """
        Save data in binary numpy format.
        """
        np.savez(filename, **self.metadata, **self.data(include))

    def save_h5(self, filename, include='all', overwrite=False):
        """
        Save data in HDF5 file.

        Returns absolute path to filename where data have been saved.
        If overwrite is False and a file would be overwritten, append a random
        string to the end of the filename.
        """
        while True:
            try:
                pathlib.Path(filename + '.lock').touch(exist_ok=False)
                break
            except FileExistsError:
                try:
                    settings.logger.warning('File %s is locked, waiting ~0.5s'%filename)
                    sleep(0.4 + 0.2*random.random())
                except KeyboardInterrupt:
                    answer = input('Ignore lock file? Then type "yes": ')
                    if answer.lower() == "yes":
                        break
                    answer = input('Save with filename extended by random string? (Yn): ')
                    if answer.lower()[0] != "n":
                        return self.save_h5(filename.removesuffix(".h5") + random_string(8) + ".h5", include, overwrite)
        try:
            file_exists = os.path.exists(filename)
            h5file = None
            while h5file is None:
                try:
                    h5file = tb.open_file(filename, "a", MAX_NUMEXPR_THREADS=1, MAX_BLOSC_THREADS=1)
                except tb.exceptions.HDF5ExtError:
                    settings.logger.warning('Error opening file %s, waiting 0.5s'%filename)
                    sleep(0.5)
            try:
                if file_exists:
                    try:
                        h5file.is_visible_node('/data/' + self.hash)
                        new_filename = filename.removesuffix(".h5") + random_string(8) + ".h5"
                        settings.logger.warning("Hash exists in file %s! Saving to %s"%(filename, new_filename))
                        return self.save_h5(new_filename, include, overwrite)
                    except tb.exceptions.NoSuchNodeError:
                        pass
                    metadata_table = h5file.get_node("/metadata/mdtable")
                else:
                    # create new file
                    metadata_parent = h5file.create_group(h5file.root, "metadata", "Metadata")
                    metadata_table = h5file.create_table(metadata_parent,
                            'mdtable',
                            dict(
                                idnum = tb.Int32Col(),
                                hash = tb.StringCol(40),
                                omega = tb.Float64Col(),
                                energy = tb.ComplexCol(16),
                                version_major = tb.Int16Col(),
                                version_minor = tb.Int16Col(),
                                git_commit_count = tb.Int16Col(),
                                git_commit_id = tb.Int32Col(),
                                timestamp = tb.Time64Col(),
                                method = tb.EnumCol(KondoExport.METHOD_ENUM, 'unknown', 'int8'),
                                solver_method = tb.EnumCol(KondoExport.SOLVER_METHOD_ENUM, 'unknown', 'int8'),
                                solver_tol_abs = tb.Float64Col(),
                                solver_tol_rel = tb.Float64Col(),
                                integral_method = tb.Int16Col(),
                                d = tb.Float64Col(),
                                vdc = tb.Float64Col(),
                                vac = tb.Float64Col(),
                                xL = tb.Float64Col(),
                                nmax = tb.Int16Col(),
                                padding = tb.Int16Col(),
                                voltage_branches = tb.Int16Col(),
                                resonant_dc_shift = tb.Int16Col(),
                                solver_flags = tb.Int16Col(),
                                lazy_inverse_factor = tb.Float64Col(),
                            )
                        )
                    h5file.create_group(h5file.root, "data", "Floquet matrices")
                    h5file.flush()

                # Save metadata
                row = metadata_table.row
                idnum = metadata_table.shape[0]
                row['idnum'] = idnum
                try:
                    if include != "all":
                        self.metadata["solver_flags"] |= DataManager.SOLVER_FLAGS["reduced"]
                except:
                    settings.logger.exception("Error while updating solver flags")
                metadata = self.metadata
                row['method'] = KondoExport.METHOD_ENUM[metadata.pop('method')]
                row['solver_method'] = KondoExport.SOLVER_METHOD_ENUM[metadata.pop('solver_method')]
                for key, value in metadata.items():
                    try:
                        row[key] = value
                    except KeyError:
                        pass
                row.append()

                # save data
                datagroup = h5file.create_group("/data/", self.hash)
                data = self.data(include)
                for key, value in data.items():
                    h5file.create_array(datagroup, key, value)
                if self.fourier_coef is not None:
                    h5file.create_array(datagroup, "fourier_coef", np.array(self.fourier_coef))
                h5file.flush()
            finally:
                h5file.close()
        finally:
            os.remove(filename + ".lock")
        return os.path.abspath(filename)


class KondoImport:
    """
    Class for importing Kondo objects that were saved with KondoExport.
    Example usage:
    >>> kondo, = KondoImport.read_from_h5("data/frtrg-01.h5", "94f81d2b49df15912798d95cae8e108d75c637c2")
    >>> print(kondo.gammaL[kondo.nmax, kondo.nmax])
    """
    def __init__(self, metadata, datanode, h5file, owns_h5file=False):
        self.metadata = metadata
        self._datanode = datanode
        self._h5file = h5file
        self._owns_h5file = owns_h5file

    def __del__(self):
        if self._owns_h5file:
            settings.logger.info("closing h5file")
            self._h5file.close()

    @classmethod
    def read_from_h5(cls, filename, khash):
        h5file = tb.open_file(filename, "r")
        datanode = h5file.get_node('/data/' + khash)
        metadatatable = h5file.get_node('/metadata/mdtable')
        counter = 0
        for row in metadatatable.where(f"hash == '{khash}'"):
            metadata = {key:row[key] for key in metadatatable.colnames}
            metadata.pop("idnum", None)
            metadata["hash"] = row["hash"].decode()
            metadata["method"] = KondoExport.METHOD_ENUM(row["method"])
            metadata["solver_method"] = KondoExport.SOLVER_METHOD_ENUM(row["solver_method"])
            item = cls(metadata, datanode, h5file)
            item._rawmetadata = row
            yield item
            counter += 1
        if counter == 1:
            item._owns_h5file = True
        else:
            settings.logger.warning("h5file will not be closed automatically")

    @classmethod
    def read_all_from_h5(cls, filename):
        h5file = tb.open_file(filename)
        metadatatable = h5file.get_node('/metadata/mdtable')
        counter = 0
        for row in metadatatable:
            metadata = {key:row[key] for key in metadatatable.colnames}
            metadata.pop("idnum", None)
            metadata["hash"] = row["hash"].decode()
            metadata["method"] = KondoExport.METHOD_ENUM(row["method"])
            metadata["solver_method"] = KondoExport.SOLVER_METHOD_ENUM(row["solver_method"])
            datanode = h5file.get_node("/data/" + metadata["hash"])
            item = cls(metadata, datanode, h5file)
            item._rawmetadata = row
            yield item
            counter += 1
        if counter == 1:
            item._owns_h5file = True
        else:
            settings.logger.warning("h5file will not be closed automatically")

    @property
    def main_results(self):
        """
        dictionary of main results: DC current, DC conductance, AC current (absolute value and phase)
        """
        results = dict(
                gamma = np.nan,
                dc_current = np.nan,
                dc_conductance = np.nan,
                ac_current_abs = np.nan,
                ac_current_phase = np.nan
                )
        nmax = self.nmax
        if self.method in ('unknown', 'mu', 'J', 'mu-reference', 'J-reference', 'mu-extrap-voltage', 'J-extrap-voltage'):
            voltage_branches = self.voltage_branches
            try:
                results['dc_current'] = self.gammaL[nmax, nmax].real
            except:
                pass
            try:
                results['dc_conductance'] = self.deltaGammaL[nmax, nmax].real
            except:
                pass
            try:
                gamma = self._datanode["gamma"].read()
                if gamma.ndim == 3:
                    results['gamma'] = gamma[voltage_branches, nmax, nmax].real
                elif gamma.ndim == 2:
                    results['gamma'] = gamma[nmax, nmax].real
                else:
                    results['gamma'] = gamma[nmax].real
            except:
                pass
            if nmax == 0:
                results['ac_current_abs'] = 0
            else:
                try:
                    results['ac_current_abs'] = np.abs(self.gammaL[nmax-1, nmax])
                    results['ac_current_phase'] = np.angle(self.gammaL[nmax-1, nmax])
                except:
                    pass
        elif self.method in ("J-compact-1", "J-compact-2"):
            results['dc_current'] = 0
            if nmax % 2:
                try:
                    results['gamma'] = self.gamma.submatrix11[nmax//2, nmax//2].real
                except:
                    pass
                try:
                    results['dc_conductance'] = self.deltaGammaL.submatrix11[nmax//2, nmax//2].real
                except:
                    pass
                try:
                    results['ac_current_abs'] = np.abs(self.gammaL.submatrix01[nmax//2, nmax//2])
                    results['ac_current_phase'] = np.angle(self.gammaL.submatrix01[nmax//2, nmax//2])
                except:
                    pass
            else:
                try:
                    results['gamma'] = self.gamma.submatrix00[nmax//2, nmax//2].real
                except:
                    pass
                try:
                    results['dc_conductance'] = self.deltaGammaL.submatrix00[nmax//2, nmax//2].real
                except:
                    pass
                try:
                    results['ac_current_abs'] = np.abs(self.gammaL.submatrix10[nmax//2-1, nmax//2])
                    results['ac_current_phase'] = np.angle(self.gammaL.submatrix10[nmax//2-1, nmax//2])
                except:
                    pass
        return results

    @property
    def fourier_coef(self):
        if "fouier_coef" in self._datanode:
            return self._datanode.fourier_coef.read()
        return None

    def __getitem__(self, name):
        if name in self.metadata:
            return self.metadata[name]
        if name in self._datanode:
            return self._datanode[name].read()
        raise KeyError("Unknown key: %s"%name)

    def __getattr__(self, name):
        if name in self.metadata:
            return self.metadata[name]
        if name in self._datanode:
            return self._datanode[name].read()
        raise AttributeError("Unknown attribute name: %s"%name)


class DataManager:
    """
    Database structure
    tables:
        datapoints (single data point)
    """
    SOLVER_FLAGS = dict(
            contains_flow = 0x0001,
            reduced = 0x0002,
            deleted = 0x0004,
            simplified_initial_conditions = 0x0008,
            enforce_symmetric = 0x0010,
            check_symmetries = 0x0020,
            ignore_symmetries = 0x0040,
            extrapolate_voltage = 0x0080,
            use_cublas = 0x0100,
            use_reference_implementation = 0x0200,
            second_order_rg_equations = 0x0400,
            solve_integral_exactly = 0x0800,
            include_Ga = 0x1000,
            )

    def __init__(self):
        self.version = settings.VERSION
        self.engine = db.create_engine(settings.DB_CONNECTION_STRING, future=True, echo=False)

        self.metadata = db.MetaData()
        try:
            self.table = db.Table('datapoints', self.metadata, autoload=True, autoload_with=self.engine)
        except db.exc.NoSuchTableError:
            with self.engine.begin() as connection:
                settings.logger.info('Creating database table datapoints')
                self.table = db.Table(
                        'datapoints',
                        self.metadata,
                        db.Column('id', db.INTEGER(), primary_key=True),
                        db.Column('hash', db.CHAR(40)),
                        db.Column('version_major', db.SMALLINT()),
                        db.Column('version_minor', db.SMALLINT()),
                        db.Column('git_commit_count', db.SMALLINT()),
                        db.Column('git_commit_id', db.INTEGER()),
                        db.Column('timestamp', db.TIMESTAMP()),
                        db.Column('method', db.Enum('unknown', 'mu', 'J', 'J-compact-1', 'J-compact-2', 'mu-reference', 'J-reference')),
                        db.Column('solver_method', db.Enum('unknown', 'RK45', 'RK23', 'DOP853', 'Radau', 'BDF', 'LSODA', 'other')),
                        db.Column('solver_tol_abs', db.FLOAT()),
                        db.Column('solver_tol_rel', db.FLOAT()),
                        db.Column('omega', db.FLOAT()),
                        db.Column('d', db.FLOAT()),
                        db.Column('vdc', db.FLOAT()),
                        db.Column('vac', db.FLOAT()),
                        db.Column('xL', db.FLOAT()),
                        db.Column('energy_re', db.FLOAT()),
                        db.Column('energy_im', db.FLOAT()),
                        db.Column('lazy_inverse_factor', db.FLOAT()),
                        db.Column('dc_current', db.FLOAT()),
                        db.Column('ac_current_abs', db.FLOAT()),
                        db.Column('ac_current_phase', db.FLOAT()),
                        db.Column('dc_conductance', db.FLOAT()),
                        db.Column('gamma', db.FLOAT()),
                        db.Column('nmax', db.SMALLINT()),
                        db.Column('padding', db.SMALLINT()),
                        db.Column('voltage_branches', db.SMALLINT()),
                        db.Column('resonant_dc_shift', db.SMALLINT()),
                        db.Column('solver_flags', db.SMALLINT()),
                        db.Column('integral_method', db.SMALLINT()),
                        db.Column('dirname', db.String(256)),
                        db.Column('basename', db.String(128)),
                        db.Column('fourier_coef_id', db.INTEGER(), default=-1),
                    )
                self.table.create(bind=connection)
        try:
            self.fourier_coef_table = db.Table('fourier_coef', self.metadata, autoload=True, autoload_with=self.engine)
        except db.exc.NoSuchTableError:
            with self.engine.begin() as connection:
                settings.logger.info('Creating database table fourier_coef')
                self.fourier_coef_table = db.Table(
                        'fourier_coef',
                        self.metadata,
                        db.Column('id', db.INTEGER(), primary_key=True),
                        db.Column('fc00', db.FLOAT(), default=0.),
                        db.Column('fc01', db.FLOAT(), default=0.),
                        db.Column('fc02', db.FLOAT(), default=0.),
                        db.Column('fc03', db.FLOAT(), default=0.),
                        db.Column('fc04', db.FLOAT(), default=0.),
                        db.Column('fc05', db.FLOAT(), default=0.),
                        db.Column('fc06', db.FLOAT(), default=0.),
                        db.Column('fc07', db.FLOAT(), default=0.),
                        db.Column('fc08', db.FLOAT(), default=0.),
                        db.Column('fc09', db.FLOAT(), default=0.),
                        db.Column('fc10', db.FLOAT(), default=0.),
                        db.Column('fc11', db.FLOAT(), default=0.),
                    )
                self.fourier_coef_table.create(bind=connection)

    def insert_from_h5file(self, filename):
        """
        Scan data in HDF5 file and insert datasets in database if they are not
        included yet.
        """
        filename = os.path.abspath(filename)
        dirname = os.path.dirname(filename)
        basename = os.path.basename(filename)
        datasets = []
        skipped = 0
        for dataset in KondoImport.read_all_from_h5(filename):
            settings.logger.debug("Checking hash=" + dataset.hash)
            candidates = self.df_table.loc[self.df_table.hash == dataset.hash]
            if candidates.shape[0] > 0:
                settings.logger.debug("Found %d times the same hash"%candidates.shape[0])
                exists = False
                for idx, candidate in candidates.iterrows():
                    if os.path.join(candidate.dirname, candidate.basename) == filename:
                        exists = True
                        break
                if exists:
                    settings.logger.debug("Entry exists, skipping")
                    skipped += 1
                    continue
                else:
                    settings.logger.debug("File seems new, continuing anyway")
            metadata = dataset.metadata.copy()
            energy = metadata.pop('energy')
            metadata.update(
                        energy_re = energy.real,
                        energy_im = energy.imag,
                        timestamp = datetime.fromtimestamp(metadata.pop("timestamp")).isoformat().replace('T', ' '),
                        dirname = dirname,
                        basename = basename,
                        fourier_coef_id = -1,
                    )
            metadata.update(dataset.main_results)
            fourier_coef = dataset.fourier_coef
            if fourier_coef is not None:
                stmt = self.fourier_coef_table.insert(values={'fc%02d'%i:f for i,f in enumerate(dataset.fourier_coef[:12])})
                with self.engine.begin() as connection:
                    result = connection.execute(stmt)
                    connection.commit()
                metadata.update(fourier_coef_id = result.inserted_primary_key)
            datasets.append(metadata)
        try:
            if not dataset._owns_h5file:
                dataset._h5file.close()
                settings.logger.info("Closed HDF5 file")
        except:
            pass
        settings.logger.info("Inserting %d new entries, ignoring %d"%(len(datasets), skipped))
        new_frame = pd.DataFrame(datasets)
        new_frame.to_sql(
                'datapoints',
                self.engine,
                if_exists = 'append',
                index = False,
                )
        del self.df_table

    def insert_in_db(self, filename : str, kondo : KondoExport):
        """
        Save metadata in database for data stored in filename.
        """
        metadata = kondo.metadata
        metadata.update(kondo.main_results)
        energy = metadata.pop('energy')
        metadata.update(
                    energy_re = energy.real,
                    energy_im = energy.imag,
                    timestamp = datetime.fromtimestamp(metadata.pop("timestamp")).isoformat().replace('T', ' '),
                    dirname = os.path.dirname(filename),
                    basename = os.path.basename(filename),
                )
        fourier_coef = kondo.fourier_coef
        if fourier_coef is not None:
            stmt = self.fourier_coef_table.insert(values={'fc%02d'%i:f for i,f in enumerate(fourier_coef[:12])})
            with self.engine.begin() as connection:
                result = connection.execute(stmt)
                connection.commit()
            metadata.update(fourier_coef_id = result.inserted_primary_key)
        frame = pd.DataFrame(metadata, index=[0])
        frame.to_sql(
                'datapoints',
                self.engine,
                if_exists='append',
                index=False,
                )
        try:
            del self.df_table
        except AttributeError:
            pass

    def save_h5(self, kondo : KondoExport, filename : str = None, include='all', overwrite=False):
        """
        Save all data in given filename and keep metadata in database.
        """
        if filename is None:
            filename = os.path.join(settings.BASEPATH, settings.FILENAME)
        if not isinstance(kondo, KondoExport):
            kondo = KondoExport(kondo)
        filename = kondo.save_h5(filename, include, overwrite)
        self.insert_in_db(filename, kondo)

    def cache_df_table(self, min_version=(0,5,-1)):
        settings.logger.debug('DataManager: cache df_table')
        with self.engine.begin() as connection:
            df_table = pd.read_sql_table('datapoints', connection, index_col='id')
        selection = (df_table.solver_flags & DataManager.SOLVER_FLAGS['deleted']) == 0
        selection &= (df_table.version_major > min_version[0]) | ( (df_table.version_major == min_version[0]) & (df_table.version_minor >= min_version[1]) )
        selection &= df_table.energy_re == 0
        selection &= df_table.energy_im == 0
        if len(min_version) > 2 and min_version[2] > 0:
            selection &= df_table.git_commit_count >= min_version[2]
        self.df_table = df_table[selection]

    def __getattr__(self, name):
        if name == 'df_table':
            self.cache_df_table()
            return self.df_table

    def list(self, min_version=(14,0,-1,-1), **parameters):
        """
        Print and return DataFrame with selection of physical parameters.
        """
        selection = (self.df_table.version_major > min_version[0]) | (self.df_table.version_major == min_version[0]) & (self.df_table.version_minor >= min_version[1])
        selection &= self.df_table.energy_re == 0
        selection &= self.df_table.energy_im == 0
        include_Ga = parameters.pop("include_Ga", None)
        if include_Ga:
            selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["include_Ga"]) != 0
        elif include_Ga == False:
            selection &= (~self.df_table.solver_flags & DataManager.SOLVER_FLAGS["include_Ga"]) != 0
        truncation_order = parameters.pop("truncation_order", None)
        if truncation_order == 2:
            selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["second_order_rg_equations"]) != 0
        elif truncation_order == 3:
            selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["second_order_rg_equations"]) == 0
        if len(min_version) > 2 and min_version[2] > 0:
            selection &= self.df_table.git_commit_count >= min_version[2]
        method = parameters.pop("method", None)
        if method == "J":
            selection &= self.df_table.method != "mu"
        elif method is not None:
            selection &= self.df_table.method == method
        for key, value in parameters.items():
            if value is None:
                continue
            try:
                selection &= np.isclose(self.df_table[key], value, rtol=1e-6, atol=1e-15)
            except TypeError:
                try:
                    selection &= self.df_table[key] == value
                except KeyError:
                    settings.logger.warning("Unknown key: %s"%key)
        if selection is True:
            result = self.df_table
        else:
            result = self.df_table.loc[selection]
        return result

    def load_all(self, **parameters):
        table = self.list(**parameters)
        for ((dirname, basename), subtable) in table.groupby(["dirname", "basename"]):
            try:
                h5file = tb.open_file(os.path.join(dirname, basename))
            except:
                settings.logger.exception("Error while loading HDF5 file")
                continue
            #metadatatable = h5file.get_node('/metadata/mdtable')
            for index, row in subtable.iterrows():
                try:
                    datanode = h5file.get_node("/data/" + row.hash)
                    #metadatarow = metadatatable.where("hash == '%s'"%(row.hash))
                    yield KondoImport(row, datanode, h5file, owns_h5file=subtable.shape[0]==1)
                except:
                    settings.logger.exception("Error while loading data")

def list_data(**kwargs):
    table = DataManager().list(**kwargs)
    print(result[['method', 'vdc', 'vac', 'omega', 'nmax', 'voltage_branches', 'padding', 'dc_current', 'dc_conductance', 'gamma', 'ac_current_abs']])