Skip to content
Snippets Groups Projects
Select Git revision
  • e4898f1998ff2d2883759ba01e3a79e1b81c6e38
  • main default protected
  • simon
3 results

mine_log.py

Blame
  • data_management.py 36.93 KiB
    # Copyright 2022 Valentin Bruch <valentin.bruch@rwth-aachen.de>
    # License: MIT
    """
    Kondo FRTRG, data management module
    
    This file contains functions and classes to manage data generated using the
    kondo module.
    
    General concepts:
    * All metadata are stored in an SQL database.
    * Floquet matrices are stored in HDF5 files.
    * Each HDF5 file can contain multiple data points. Data points can be added to
      HDF5 files.
    * Each HDF5 file contains a table of metadata for the data points stored in
      this file.
    * Data points are identified in HDF5 files by a hash generated from their full
      Floquet matrices at the end of the RG flow.
    * The SQL database stores the directory, filename, and hash where the Floquet
      matrices are stored.
    
    Implementation:
    * pandas for accessing the SQL database and managing the full table of metadata
    * pytables for HDF5 files
    * a file "filename.lock" is temporarily created when writing to a HDF5 file.
    """
    
    import os
    import tables as tb
    import pathlib
    from time import sleep
    from datetime import datetime
    import numpy as np
    import pandas as pd
    import sqlalchemy as db
    import random
    import warnings
    import settings
    
    # We use hashs as identifiers for data points in HDF5 files. These hashs are
    # often not valid python names, which causes a warning. We ignore this warning.
    warnings.simplefilter("ignore", tb.NaturalNameWarning)
    
    def random_string(length : int):
        """
        Generate random strings of alphanumerical characters with given length.
        """
        res = ""
        for _ in range(length):
            x = random.randint(0, 61)
            if x < 10:
                res += chr(x + 48)
            elif x < 36:
                res += chr(x + 55)
            else:
                res += chr(x + 61)
        return res
    
    
    def replace_all(string:str, replacements:dict):
        """
        Apply all replacements to string
        """
        for old, new in replacements.items():
            string = string.replace(old, new)
        return string
    
    
    class KondoExport:
        """
        Class for saving Kondo object to file.
        Example usage:
        >>> kondo = Kondo(...)
        >>> kondo.run(...)
        >>> KondoExport(kondo).save_h5("data/frtrg-01.h5")
        """
        METHOD_ENUM = tb.Enum(('unknown', 'mu', 'J', 'J-compact-1', 'J-compact-2', 'mu-reference', 'J-reference', 'mu-extrap-voltage', 'J-extrap-voltage'))
        SOLVER_METHOD_ENUM = tb.Enum(('unknown', 'RK45', 'RK23', 'DOP853', 'Radau', 'BDF', 'LSODA', 'other'))
    
        def __init__(self, kondo):
            self.kondo = kondo
    
        @property
        def hash(self):
            """
            hash based on Floquet matrices in Kondo object
            """
            try:
                return self._hash
            except AttributeError:
                self._hash = self.kondo.hash()[:40]
                return self._hash
    
        @property
        def metadata(self):
            """
            dictionary of metadata
            """
            # Determine method
            if self.kondo.unitary_transformation:
                if self.kondo.compact == 2:
                    method = 'J-compact-2'
                elif self.kondo.compact == 1:
                    method = 'J-compact-1'
                else:
                    method = 'J'
            else:
                method = 'mu'
    
            # Collect solver flags
            solver_flags = 0
            try:
                if self.kondo.simplified_initial_conditions:
                    solver_flags |= DataManager.SOLVER_FLAGS["simplified_initial_conditions"]
            except AttributeError:
                pass
            try:
                if self.kondo.include_Ga:
                    solver_flags |= DataManager.SOLVER_FLAGS["include_Ga"]
            except AttributeError:
                pass
            try:
                if self.kondo.solve_integral_exactly:
                    solver_flags |= DataManager.SOLVER_FLAGS["solve_integral_exactly"]
            except AttributeError:
                pass
            try:
                if self.kondo.truncation_order == 2:
                    solver_flags |= DataManager.SOLVER_FLAGS["second_order_rg_equations"]
                elif self.kondo.truncation_order != 3:
                    settings.logger.warning("Invalid truncation order: %s"%self.kondo.truncation_order)
            except AttributeError:
                pass
            for (key, value) in self.kondo.global_settings.items():
                if value:
                    try:
                        solver_flags |= DataManager.SOLVER_FLAGS[key.lower()]
                    except KeyError:
                        pass
    
            version = self.kondo.global_settings["VERSION"]
            return dict(
                    hash = self.hash,
                    omega = self.kondo.omega,
                    energy = self.kondo.energy,
                    version_major = version[0],
                    version_minor = version[1],
                    lazy_inverse_factor = self.kondo.global_settings["LAZY_INVERSE_FACTOR"],
                    git_commit_count = version[2],
                    git_commit_id = version[3],
                    method = method,
                    timestamp = datetime.utcnow().timestamp(),
                    solver_method = getattr(self.kondo, 'solveopts', {}).get('method', 'unknown'),
                    solver_tol_abs = getattr(self.kondo, 'solveopts', {}).get('atol', -1),
                    solver_tol_rel = getattr(self.kondo, 'solveopts', {}).get('rtol', -1),
                    integral_method = getattr(self.kondo, "integral_method", -15),
                    d = self.kondo.d,
                    vdc = self.kondo.vdc,
                    vac = self.kondo.vac,
                    xL = self.kondo.xL,
                    nmax = self.kondo.nmax,
                    padding = self.kondo.padding,
                    voltage_branches = self.kondo.voltage_branches,
                    resonant_dc_shift = self.kondo.resonant_dc_shift,
                    solver_flags = solver_flags,
                    )
    
        @property
        def main_results(self):
            """
            dictionary of main results: DC current, DC conductance, AC current (absolute value and phase)
            """
            results = dict(
                    gamma = np.nan,
                    dc_current = np.nan,
                    dc_conductance = np.nan,
                    ac_current_abs = np.nan,
                    ac_current_phase = np.nan
                    )
            nmax = self.kondo.nmax
            vb = self.kondo.voltage_branches
            if self.kondo.compact == 0:
                try:
                    results['gamma'] = self.kondo.gamma[vb, nmax, nmax].real
                except:
                    pass
                try:
                    results['dc_current'] = self.kondo.gammaL[nmax, nmax].real
                except:
                    pass
                try:
                    results['dc_conductance'] = self.kondo.deltaGammaL[nmax, nmax].real
                except:
                    pass
                if nmax == 0:
                    results['ac_current_abs'] = 0
                else:
                    try:
                        results['ac_current_abs'] = np.abs(self.kondo.gammaL[nmax-1, nmax])
                        results['ac_current_phase'] = np.angle(self.kondo.gammaL[nmax-1, nmax])
                    except:
                        pass
            elif self.kondo.compact:
                results['dc_current'] = 0
                if nmax % 2:
                    try:
                        results['gamma'] = self.kondo.gamma.submatrix11[nmax//2, nmax//2].real
                    except:
                        pass
                    try:
                        results['dc_conductance'] = self.kondo.deltaGammaL.submatrix11[nmax//2, nmax//2].real
                    except:
                        pass
                    try:
                        results['ac_current_abs'] = np.abs(self.kondo.gammaL.submatrix01[nmax//2, nmax//2])
                        results['ac_current_phase'] = np.angle(self.kondo.gammaL.submatrix01[nmax//2, nmax//2])
                    except:
                        pass
                else:
                    try:
                        results['gamma'] = self.kondo.gamma.submatrix00[nmax//2, nmax//2].real
                    except:
                        pass
                    try:
                        results['dc_conductance'] = self.kondo.deltaGammaL.submatrix00[nmax//2, nmax//2].real
                    except:
                        pass
                    try:
                        results['ac_current_abs'] = np.abs(self.kondo.gammaL.submatrix10[nmax//2-1, nmax//2])
                        results['ac_current_phase'] = np.angle(self.kondo.gammaL.submatrix10[nmax//2-1, nmax//2])
                    except:
                        pass
            return results
    
        @property
        def fourier_coef(self):
            return self.kondo.fourier_coef
    
        def data(self, include='all'):
            """
            dictionary of Floquet matrices as numpy arrays.
    
            Argument include takes the following values:
            "all":      save all data (Floquet matrices including voltage shifts)
            "reduced":  exclude voltage shifts and yL
            "observables: save only gamma, gammaL, deltaGammaL, excluding voltage
                        shifts
            "minimal":  save only central column of Floquet matrices for gamma,
                        gammaL, deltaGammaL, excluding voltage
            """
            if include == 'all':
                save = dict(
                        gamma = self.kondo.gamma.values,
                        z = self.kondo.z.values,
                        gammaL = self.kondo.gammaL.values,
                        deltaGammaL = self.kondo.deltaGammaL.values,
                        deltaGamma = self.kondo.deltaGamma.values,
                        yL = self.kondo.yL.values,
                        g2 = self.kondo.g2.to_numpy_array(),
                        g3 = self.kondo.g3.to_numpy_array(),
                        current = self.kondo.current.to_numpy_array(),
                        )
                if self.kondo.include_Ga:
                    save["ga"] = self.kondo.ga.to_numpy_array()
            elif include == 'reduced':
                if self.kondo.voltage_branches:
                    vb = self.kondo.voltage_branches
                    save = dict(
                            gamma = self.kondo.gamma[vb],
                            z = self.kondo.z[vb],
                            gammaL = self.kondo.gammaL.values,
                            deltaGammaL = self.kondo.deltaGammaL.values,
                            deltaGamma = self.kondo.deltaGamma[min(vb,1)],
                            g2 = self.kondo.g2.to_numpy_array()[:,:,vb],
                            g3 = self.kondo.g3.to_numpy_array()[:,:,vb],
                            current = self.kondo.current.to_numpy_array(),
                            )
                    if self.kondo.include_Ga:
                        save["ga"] = self.kondo.ga.to_numpy_array()[:,:,vb]
                else:
                    save = dict(
                            gamma = self.kondo.gamma.values,
                            z = self.kondo.z.values,
                            gammaL = self.kondo.gammaL.values,
                            deltaGammaL = self.kondo.deltaGammaL.values,
                            deltaGamma = self.kondo.deltaGamma.values,
                            g2 = self.kondo.g2.to_numpy_array(),
                            g3 = self.kondo.g3.to_numpy_array(),
                            current = self.kondo.current.to_numpy_array(),
                            )
                    if self.kondo.include_Ga:
                        save["ga"] = self.kondo.ga.to_numpy_array()
            elif include == 'observables':
                if self.kondo.voltage_branches:
                    vb = self.kondo.voltage_branches
                    save = dict(
                            gamma = self.kondo.gamma[vb],
                            gammaL = self.kondo.gammaL.values,
                            deltaGammaL = self.kondo.deltaGammaL.values,
                            )
                else:
                    save = dict(
                            gamma = self.kondo.gamma.values,
                            gammaL = self.kondo.gammaL.values,
                            deltaGammaL = self.kondo.deltaGammaL.values,
                            )
            elif include == 'minimal':
                nmax = self.kondo.nmax
                if self.kondo.voltage_branches:
                    vb = self.kondo.voltage_branches
                    save = dict(
                            gamma = self.kondo.gamma[vb,:,nmax],
                            gammaL = self.kondo.gammaL[:,nmax],
                            deltaGammaL = self.kondo.deltaGammaL[:,nmax],
                            )
                else:
                    save = dict(
                            gamma = self.kondo.gamma[:,nmax],
                            gammaL = self.kondo.gammaL[:,nmax],
                            deltaGammaL = self.kondo.deltaGammaL[:,nmax],
                            )
            else:
                raise ValueError("Unknown value for include: " + include)
            return save
    
        def save_npz(self, filename, include="all"):
            """
            Save data in binary numpy format.
            """
            np.savez(filename, **self.metadata, **self.data(include))
    
        def save_h5(self, filename, include='all', overwrite=False):
            """
            Save data in HDF5 file.
    
            Returns absolute path to filename where data have been saved.
            If overwrite is False and a file would be overwritten, append a random
            string to the end of the filename.
            """
            while True:
                try:
                    pathlib.Path(filename + '.lock').touch(exist_ok=False)
                    break
                except FileExistsError:
                    try:
                        settings.logger.warning('File %s is locked, waiting ~0.5s'%filename)
                        sleep(0.4 + 0.2*random.random())
                    except KeyboardInterrupt:
                        answer = input('Ignore lock file? Then type "yes": ')
                        if answer.lower() == "yes":
                            break
                        answer = input('Save with filename extended by random string? (Yn): ')
                        if answer.lower()[0] != "n":
                            return self.save_h5(filename.removesuffix(".h5") + random_string(8) + ".h5", include, overwrite)
            try:
                file_exists = os.path.exists(filename)
                h5file = None
                while h5file is None:
                    try:
                        h5file = tb.open_file(filename, "a", MAX_NUMEXPR_THREADS=1, MAX_BLOSC_THREADS=1)
                    except tb.exceptions.HDF5ExtError:
                        settings.logger.warning('Error opening file %s, waiting 0.5s'%filename)
                        sleep(0.5)
                try:
                    if file_exists:
                        try:
                            h5file.is_visible_node('/data/' + self.hash)
                            new_filename = filename.removesuffix(".h5") + random_string(8) + ".h5"
                            settings.logger.warning("Hash exists in file %s! Saving to %s"%(filename, new_filename))
                            return self.save_h5(new_filename, include, overwrite)
                        except tb.exceptions.NoSuchNodeError:
                            pass
                        metadata_table = h5file.get_node("/metadata/mdtable")
                    else:
                        # create new file
                        metadata_parent = h5file.create_group(h5file.root, "metadata", "Metadata")
                        metadata_table = h5file.create_table(metadata_parent,
                                'mdtable',
                                dict(
                                    idnum = tb.Int32Col(),
                                    hash = tb.StringCol(40),
                                    omega = tb.Float64Col(),
                                    energy = tb.ComplexCol(16),
                                    version_major = tb.Int16Col(),
                                    version_minor = tb.Int16Col(),
                                    git_commit_count = tb.Int16Col(),
                                    git_commit_id = tb.Int32Col(),
                                    timestamp = tb.Time64Col(),
                                    method = tb.EnumCol(KondoExport.METHOD_ENUM, 'unknown', 'int8'),
                                    solver_method = tb.EnumCol(KondoExport.SOLVER_METHOD_ENUM, 'unknown', 'int8'),
                                    solver_tol_abs = tb.Float64Col(),
                                    solver_tol_rel = tb.Float64Col(),
                                    integral_method = tb.Int16Col(),
                                    d = tb.Float64Col(),
                                    vdc = tb.Float64Col(),
                                    vac = tb.Float64Col(),
                                    xL = tb.Float64Col(),
                                    nmax = tb.Int16Col(),
                                    padding = tb.Int16Col(),
                                    voltage_branches = tb.Int16Col(),
                                    resonant_dc_shift = tb.Int16Col(),
                                    solver_flags = tb.Int16Col(),
                                    lazy_inverse_factor = tb.Float64Col(),
                                )
                            )
                        h5file.create_group(h5file.root, "data", "Floquet matrices")
                        h5file.flush()
    
                    # Save metadata
                    row = metadata_table.row
                    idnum = metadata_table.shape[0]
                    row['idnum'] = idnum
                    try:
                        if include != "all":
                            self.metadata["solver_flags"] |= DataManager.SOLVER_FLAGS["reduced"]
                    except:
                        settings.logger.exception("Error while updating solver flags")
                    metadata = self.metadata
                    row['method'] = KondoExport.METHOD_ENUM[metadata.pop('method')]
                    row['solver_method'] = KondoExport.SOLVER_METHOD_ENUM[metadata.pop('solver_method')]
                    for key, value in metadata.items():
                        try:
                            row[key] = value
                        except KeyError:
                            pass
                    row.append()
    
                    # save data
                    datagroup = h5file.create_group("/data/", self.hash)
                    data = self.data(include)
                    for key, value in data.items():
                        h5file.create_array(datagroup, key, value)
                    if self.fourier_coef is not None:
                        h5file.create_array(datagroup, "fourier_coef", np.array(self.fourier_coef))
                    h5file.flush()
                finally:
                    h5file.close()
            finally:
                os.remove(filename + ".lock")
            return os.path.abspath(filename)
    
    
    class KondoImport:
        """
        Class for importing Kondo objects that were saved with KondoExport.
        Example usage:
        >>> kondo, = KondoImport.read_from_h5("data/frtrg-01.h5", "94f81d2b49df15912798d95cae8e108d75c637c2")
        >>> print(kondo.gammaL[kondo.nmax, kondo.nmax])
        """
        def __init__(self, metadata, datanode, h5file, owns_h5file=False):
            self.metadata = metadata
            self._datanode = datanode
            self._h5file = h5file
            self._owns_h5file = owns_h5file
    
        def __del__(self):
            if self._owns_h5file:
                settings.logger.info("closing h5file")
                self._h5file.close()
    
        @classmethod
        def read_from_h5(cls, filename, khash):
            h5file = tb.open_file(filename, "r")
            datanode = h5file.get_node('/data/' + khash)
            metadatatable = h5file.get_node('/metadata/mdtable')
            counter = 0
            for row in metadatatable.where(f"hash == '{khash}'"):
                metadata = {key:row[key] for key in metadatatable.colnames}
                metadata.pop("idnum", None)
                metadata["hash"] = row["hash"].decode()
                metadata["method"] = KondoExport.METHOD_ENUM(row["method"])
                metadata["solver_method"] = KondoExport.SOLVER_METHOD_ENUM(row["solver_method"])
                item = cls(metadata, datanode, h5file)
                item._rawmetadata = row
                yield item
                counter += 1
            if counter == 1:
                item._owns_h5file = True
            else:
                settings.logger.warning("h5file will not be closed automatically")
    
        @classmethod
        def read_all_from_h5(cls, filename):
            h5file = tb.open_file(filename)
            metadatatable = h5file.get_node('/metadata/mdtable')
            counter = 0
            for row in metadatatable:
                metadata = {key:row[key] for key in metadatatable.colnames}
                metadata.pop("idnum", None)
                metadata["hash"] = row["hash"].decode()
                metadata["method"] = KondoExport.METHOD_ENUM(row["method"])
                metadata["solver_method"] = KondoExport.SOLVER_METHOD_ENUM(row["solver_method"])
                datanode = h5file.get_node("/data/" + metadata["hash"])
                item = cls(metadata, datanode, h5file)
                item._rawmetadata = row
                yield item
                counter += 1
            if counter == 1:
                item._owns_h5file = True
            else:
                settings.logger.warning("h5file will not be closed automatically")
    
        @property
        def main_results(self):
            """
            dictionary of main results: DC current, DC conductance, AC current (absolute value and phase)
            """
            results = dict(
                    gamma = np.nan,
                    dc_current = np.nan,
                    dc_conductance = np.nan,
                    ac_current_abs = np.nan,
                    ac_current_phase = np.nan
                    )
            nmax = self.nmax
            if self.method in ('unknown', 'mu', 'J', 'mu-reference', 'J-reference', 'mu-extrap-voltage', 'J-extrap-voltage'):
                voltage_branches = self.voltage_branches
                try:
                    results['dc_current'] = self.gammaL[nmax, nmax].real
                except:
                    pass
                try:
                    results['dc_conductance'] = self.deltaGammaL[nmax, nmax].real
                except:
                    pass
                try:
                    gamma = self._datanode["gamma"].read()
                    if gamma.ndim == 3:
                        results['gamma'] = gamma[voltage_branches, nmax, nmax].real
                    elif gamma.ndim == 2:
                        results['gamma'] = gamma[nmax, nmax].real
                    else:
                        results['gamma'] = gamma[nmax].real
                except:
                    pass
                if nmax == 0:
                    results['ac_current_abs'] = 0
                else:
                    try:
                        results['ac_current_abs'] = np.abs(self.gammaL[nmax-1, nmax])
                        results['ac_current_phase'] = np.angle(self.gammaL[nmax-1, nmax])
                    except:
                        pass
            elif self.method in ("J-compact-1", "J-compact-2"):
                results['dc_current'] = 0
                if nmax % 2:
                    try:
                        results['gamma'] = self.gamma.submatrix11[nmax//2, nmax//2].real
                    except:
                        pass
                    try:
                        results['dc_conductance'] = self.deltaGammaL.submatrix11[nmax//2, nmax//2].real
                    except:
                        pass
                    try:
                        results['ac_current_abs'] = np.abs(self.gammaL.submatrix01[nmax//2, nmax//2])
                        results['ac_current_phase'] = np.angle(self.gammaL.submatrix01[nmax//2, nmax//2])
                    except:
                        pass
                else:
                    try:
                        results['gamma'] = self.gamma.submatrix00[nmax//2, nmax//2].real
                    except:
                        pass
                    try:
                        results['dc_conductance'] = self.deltaGammaL.submatrix00[nmax//2, nmax//2].real
                    except:
                        pass
                    try:
                        results['ac_current_abs'] = np.abs(self.gammaL.submatrix10[nmax//2-1, nmax//2])
                        results['ac_current_phase'] = np.angle(self.gammaL.submatrix10[nmax//2-1, nmax//2])
                    except:
                        pass
            return results
    
        @property
        def fourier_coef(self):
            if "fouier_coef" in self._datanode:
                return self._datanode.fourier_coef.read()
            return None
    
        def __getitem__(self, name):
            if name in self.metadata:
                return self.metadata[name]
            if name in self._datanode:
                return self._datanode[name].read()
            raise KeyError("Unknown key: %s"%name)
    
        def __getattr__(self, name):
            if name in self.metadata:
                return self.metadata[name]
            if name in self._datanode:
                return self._datanode[name].read()
            raise AttributeError("Unknown attribute name: %s"%name)
    
    
    class DataManager:
        """
        Database structure
        tables:
            datapoints (single data point)
        """
        SOLVER_FLAGS = dict(
                contains_flow = 0x0001,
                reduced = 0x0002,
                deleted = 0x0004,
                simplified_initial_conditions = 0x0008,
                enforce_symmetric = 0x0010,
                check_symmetries = 0x0020,
                ignore_symmetries = 0x0040,
                extrapolate_voltage = 0x0080,
                use_cublas = 0x0100,
                use_reference_implementation = 0x0200,
                second_order_rg_equations = 0x0400,
                solve_integral_exactly = 0x0800,
                include_Ga = 0x1000,
                )
    
        def __init__(self):
            self.version = settings.VERSION
            self.engine = db.create_engine(settings.DB_CONNECTION_STRING, future=True, echo=False)
    
            self.metadata = db.MetaData()
            try:
                self.table = db.Table('datapoints', self.metadata, autoload=True, autoload_with=self.engine)
            except db.exc.NoSuchTableError:
                with self.engine.begin() as connection:
                    settings.logger.info('Creating database table datapoints')
                    self.table = db.Table(
                            'datapoints',
                            self.metadata,
                            db.Column('id', db.INTEGER(), primary_key=True),
                            db.Column('hash', db.CHAR(40)),
                            db.Column('version_major', db.SMALLINT()),
                            db.Column('version_minor', db.SMALLINT()),
                            db.Column('git_commit_count', db.SMALLINT()),
                            db.Column('git_commit_id', db.INTEGER()),
                            db.Column('timestamp', db.TIMESTAMP()),
                            db.Column('method', db.Enum('unknown', 'mu', 'J', 'J-compact-1', 'J-compact-2', 'mu-reference', 'J-reference')),
                            db.Column('solver_method', db.Enum('unknown', 'RK45', 'RK23', 'DOP853', 'Radau', 'BDF', 'LSODA', 'other')),
                            db.Column('solver_tol_abs', db.FLOAT()),
                            db.Column('solver_tol_rel', db.FLOAT()),
                            db.Column('omega', db.FLOAT()),
                            db.Column('d', db.FLOAT()),
                            db.Column('vdc', db.FLOAT()),
                            db.Column('vac', db.FLOAT()),
                            db.Column('xL', db.FLOAT()),
                            db.Column('energy_re', db.FLOAT()),
                            db.Column('energy_im', db.FLOAT()),
                            db.Column('lazy_inverse_factor', db.FLOAT()),
                            db.Column('dc_current', db.FLOAT()),
                            db.Column('ac_current_abs', db.FLOAT()),
                            db.Column('ac_current_phase', db.FLOAT()),
                            db.Column('dc_conductance', db.FLOAT()),
                            db.Column('gamma', db.FLOAT()),
                            db.Column('nmax', db.SMALLINT()),
                            db.Column('padding', db.SMALLINT()),
                            db.Column('voltage_branches', db.SMALLINT()),
                            db.Column('resonant_dc_shift', db.SMALLINT()),
                            db.Column('solver_flags', db.SMALLINT()),
                            db.Column('integral_method', db.SMALLINT()),
                            db.Column('dirname', db.String(256)),
                            db.Column('basename', db.String(128)),
                            db.Column('fourier_coef_id', db.INTEGER(), default=-1),
                        )
                    self.table.create(bind=connection)
            try:
                self.fourier_coef_table = db.Table('fourier_coef', self.metadata, autoload=True, autoload_with=self.engine)
            except db.exc.NoSuchTableError:
                with self.engine.begin() as connection:
                    settings.logger.info('Creating database table fourier_coef')
                    self.fourier_coef_table = db.Table(
                            'fourier_coef',
                            self.metadata,
                            db.Column('id', db.INTEGER(), primary_key=True),
                            db.Column('fc00', db.FLOAT(), default=0.),
                            db.Column('fc01', db.FLOAT(), default=0.),
                            db.Column('fc02', db.FLOAT(), default=0.),
                            db.Column('fc03', db.FLOAT(), default=0.),
                            db.Column('fc04', db.FLOAT(), default=0.),
                            db.Column('fc05', db.FLOAT(), default=0.),
                            db.Column('fc06', db.FLOAT(), default=0.),
                            db.Column('fc07', db.FLOAT(), default=0.),
                            db.Column('fc08', db.FLOAT(), default=0.),
                            db.Column('fc09', db.FLOAT(), default=0.),
                            db.Column('fc10', db.FLOAT(), default=0.),
                            db.Column('fc11', db.FLOAT(), default=0.),
                        )
                    self.fourier_coef_table.create(bind=connection)
    
        def insert_from_h5file(self, filename):
            """
            Scan data in HDF5 file and insert datasets in database if they are not
            included yet.
            """
            filename = os.path.abspath(filename)
            dirname = os.path.dirname(filename)
            basename = os.path.basename(filename)
            datasets = []
            skipped = 0
            for dataset in KondoImport.read_all_from_h5(filename):
                settings.logger.debug("Checking hash=" + dataset.hash)
                candidates = self.df_table.loc[self.df_table.hash == dataset.hash]
                if candidates.shape[0] > 0:
                    settings.logger.debug("Found %d times the same hash"%candidates.shape[0])
                    exists = False
                    for idx, candidate in candidates.iterrows():
                        if os.path.join(candidate.dirname, candidate.basename) == filename:
                            exists = True
                            break
                    if exists:
                        settings.logger.debug("Entry exists, skipping")
                        skipped += 1
                        continue
                    else:
                        settings.logger.debug("File seems new, continuing anyway")
                metadata = dataset.metadata.copy()
                energy = metadata.pop('energy')
                metadata.update(
                            energy_re = energy.real,
                            energy_im = energy.imag,
                            timestamp = datetime.fromtimestamp(metadata.pop("timestamp")).isoformat().replace('T', ' '),
                            dirname = dirname,
                            basename = basename,
                            fourier_coef_id = -1,
                        )
                metadata.update(dataset.main_results)
                fourier_coef = dataset.fourier_coef
                if fourier_coef is not None:
                    stmt = self.fourier_coef_table.insert(values={'fc%02d'%i:f for i,f in enumerate(dataset.fourier_coef[:12])})
                    with self.engine.begin() as connection:
                        result = connection.execute(stmt)
                        connection.commit()
                    metadata.update(fourier_coef_id = result.inserted_primary_key)
                datasets.append(metadata)
            try:
                if not dataset._owns_h5file:
                    dataset._h5file.close()
                    settings.logger.info("Closed HDF5 file")
            except:
                pass
            settings.logger.info("Inserting %d new entries, ignoring %d"%(len(datasets), skipped))
            new_frame = pd.DataFrame(datasets)
            new_frame.to_sql(
                    'datapoints',
                    self.engine,
                    if_exists = 'append',
                    index = False,
                    )
            del self.df_table
    
        def insert_in_db(self, filename : str, kondo : KondoExport):
            """
            Save metadata in database for data stored in filename.
            """
            metadata = kondo.metadata
            metadata.update(kondo.main_results)
            energy = metadata.pop('energy')
            metadata.update(
                        energy_re = energy.real,
                        energy_im = energy.imag,
                        timestamp = datetime.fromtimestamp(metadata.pop("timestamp")).isoformat().replace('T', ' '),
                        dirname = os.path.dirname(filename),
                        basename = os.path.basename(filename),
                    )
            fourier_coef = kondo.fourier_coef
            if fourier_coef is not None:
                stmt = self.fourier_coef_table.insert(values={'fc%02d'%i:f for i,f in enumerate(fourier_coef[:12])})
                with self.engine.begin() as connection:
                    result = connection.execute(stmt)
                    connection.commit()
                metadata.update(fourier_coef_id = result.inserted_primary_key)
            frame = pd.DataFrame(metadata, index=[0])
            frame.to_sql(
                    'datapoints',
                    self.engine,
                    if_exists='append',
                    index=False,
                    )
            try:
                del self.df_table
            except AttributeError:
                pass
    
        def save_h5(self, kondo : KondoExport, filename : str = None, include='all', overwrite=False):
            """
            Save all data in given filename and keep metadata in database.
            """
            if filename is None:
                filename = os.path.join(settings.BASEPATH, settings.FILENAME)
            if not isinstance(kondo, KondoExport):
                kondo = KondoExport(kondo)
            filename = kondo.save_h5(filename, include, overwrite)
            self.insert_in_db(filename, kondo)
    
        def cache_df_table(self, min_version=(0,5,-1)):
            settings.logger.debug('DataManager: cache df_table')
            with self.engine.begin() as connection:
                df_table = pd.read_sql_table('datapoints', connection, index_col='id')
            selection = (df_table.solver_flags & DataManager.SOLVER_FLAGS['deleted']) == 0
            selection &= (df_table.version_major > min_version[0]) | ( (df_table.version_major == min_version[0]) & (df_table.version_minor >= min_version[1]) )
            selection &= df_table.energy_re == 0
            selection &= df_table.energy_im == 0
            if len(min_version) > 2 and min_version[2] > 0:
                selection &= df_table.git_commit_count >= min_version[2]
            self.df_table = df_table[selection]
    
        def __getattr__(self, name):
            if name == 'df_table':
                self.cache_df_table()
                return self.df_table
    
        def list(self, min_version=(14,0,-1,-1), **parameters):
            """
            Print and return DataFrame with selection of physical parameters.
            """
            selection = (self.df_table.version_major > min_version[0]) | (self.df_table.version_major == min_version[0]) & (self.df_table.version_minor >= min_version[1])
            selection &= self.df_table.energy_re == 0
            selection &= self.df_table.energy_im == 0
            include_Ga = parameters.pop("include_Ga", None)
            if include_Ga:
                selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["include_Ga"]) != 0
            elif include_Ga == False:
                selection &= (~self.df_table.solver_flags & DataManager.SOLVER_FLAGS["include_Ga"]) != 0
            truncation_order = parameters.pop("truncation_order", None)
            if truncation_order == 2:
                selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["second_order_rg_equations"]) != 0
            elif truncation_order == 3:
                selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["second_order_rg_equations"]) == 0
            if len(min_version) > 2 and min_version[2] > 0:
                selection &= self.df_table.git_commit_count >= min_version[2]
            method = parameters.pop("method", None)
            if method == "J":
                selection &= self.df_table.method != "mu"
            elif method is not None:
                selection &= self.df_table.method == method
            for key, value in parameters.items():
                if value is None:
                    continue
                try:
                    selection &= np.isclose(self.df_table[key], value, rtol=1e-6, atol=1e-15)
                except TypeError:
                    try:
                        selection &= self.df_table[key] == value
                    except KeyError:
                        settings.logger.warning("Unknown key: %s"%key)
            if selection is True:
                result = self.df_table
            else:
                result = self.df_table.loc[selection]
            return result
    
        def load_all(self, **parameters):
            table = self.list(**parameters)
            for ((dirname, basename), subtable) in table.groupby(["dirname", "basename"]):
                try:
                    h5file = tb.open_file(os.path.join(dirname, basename))
                except:
                    settings.logger.exception("Error while loading HDF5 file")
                    continue
                #metadatatable = h5file.get_node('/metadata/mdtable')
                for index, row in subtable.iterrows():
                    try:
                        datanode = h5file.get_node("/data/" + row.hash)
                        #metadatarow = metadatatable.where("hash == '%s'"%(row.hash))
                        yield KondoImport(row, datanode, h5file, owns_h5file=subtable.shape[0]==1)
                    except:
                        settings.logger.exception("Error while loading data")
    
    def list_data(**kwargs):
        table = DataManager().list(**kwargs)
        print(result[['method', 'vdc', 'vac', 'omega', 'nmax', 'voltage_branches', 'padding', 'dc_current', 'dc_conductance', 'gamma', 'ac_current_abs']])