Select Git revision
data_management.py 36.93 KiB
# Copyright 2022 Valentin Bruch <valentin.bruch@rwth-aachen.de>
# License: MIT
"""
Kondo FRTRG, data management module
This file contains functions and classes to manage data generated using the
kondo module.
General concepts:
* All metadata are stored in an SQL database.
* Floquet matrices are stored in HDF5 files.
* Each HDF5 file can contain multiple data points. Data points can be added to
HDF5 files.
* Each HDF5 file contains a table of metadata for the data points stored in
this file.
* Data points are identified in HDF5 files by a hash generated from their full
Floquet matrices at the end of the RG flow.
* The SQL database stores the directory, filename, and hash where the Floquet
matrices are stored.
Implementation:
* pandas for accessing the SQL database and managing the full table of metadata
* pytables for HDF5 files
* a file "filename.lock" is temporarily created when writing to a HDF5 file.
"""
import os
import tables as tb
import pathlib
from time import sleep
from datetime import datetime
import numpy as np
import pandas as pd
import sqlalchemy as db
import random
import warnings
import settings
# We use hashs as identifiers for data points in HDF5 files. These hashs are
# often not valid python names, which causes a warning. We ignore this warning.
warnings.simplefilter("ignore", tb.NaturalNameWarning)
def random_string(length : int):
"""
Generate random strings of alphanumerical characters with given length.
"""
res = ""
for _ in range(length):
x = random.randint(0, 61)
if x < 10:
res += chr(x + 48)
elif x < 36:
res += chr(x + 55)
else:
res += chr(x + 61)
return res
def replace_all(string:str, replacements:dict):
"""
Apply all replacements to string
"""
for old, new in replacements.items():
string = string.replace(old, new)
return string
class KondoExport:
"""
Class for saving Kondo object to file.
Example usage:
>>> kondo = Kondo(...)
>>> kondo.run(...)
>>> KondoExport(kondo).save_h5("data/frtrg-01.h5")
"""
METHOD_ENUM = tb.Enum(('unknown', 'mu', 'J', 'J-compact-1', 'J-compact-2', 'mu-reference', 'J-reference', 'mu-extrap-voltage', 'J-extrap-voltage'))
SOLVER_METHOD_ENUM = tb.Enum(('unknown', 'RK45', 'RK23', 'DOP853', 'Radau', 'BDF', 'LSODA', 'other'))
def __init__(self, kondo):
self.kondo = kondo
@property
def hash(self):
"""
hash based on Floquet matrices in Kondo object
"""
try:
return self._hash
except AttributeError:
self._hash = self.kondo.hash()[:40]
return self._hash
@property
def metadata(self):
"""
dictionary of metadata
"""
# Determine method
if self.kondo.unitary_transformation:
if self.kondo.compact == 2:
method = 'J-compact-2'
elif self.kondo.compact == 1:
method = 'J-compact-1'
else:
method = 'J'
else:
method = 'mu'
# Collect solver flags
solver_flags = 0
try:
if self.kondo.simplified_initial_conditions:
solver_flags |= DataManager.SOLVER_FLAGS["simplified_initial_conditions"]
except AttributeError:
pass
try:
if self.kondo.include_Ga:
solver_flags |= DataManager.SOLVER_FLAGS["include_Ga"]
except AttributeError:
pass
try:
if self.kondo.solve_integral_exactly:
solver_flags |= DataManager.SOLVER_FLAGS["solve_integral_exactly"]
except AttributeError:
pass
try:
if self.kondo.truncation_order == 2:
solver_flags |= DataManager.SOLVER_FLAGS["second_order_rg_equations"]
elif self.kondo.truncation_order != 3:
settings.logger.warning("Invalid truncation order: %s"%self.kondo.truncation_order)
except AttributeError:
pass
for (key, value) in self.kondo.global_settings.items():
if value:
try:
solver_flags |= DataManager.SOLVER_FLAGS[key.lower()]
except KeyError:
pass
version = self.kondo.global_settings["VERSION"]
return dict(
hash = self.hash,
omega = self.kondo.omega,
energy = self.kondo.energy,
version_major = version[0],
version_minor = version[1],
lazy_inverse_factor = self.kondo.global_settings["LAZY_INVERSE_FACTOR"],
git_commit_count = version[2],
git_commit_id = version[3],
method = method,
timestamp = datetime.utcnow().timestamp(),
solver_method = getattr(self.kondo, 'solveopts', {}).get('method', 'unknown'),
solver_tol_abs = getattr(self.kondo, 'solveopts', {}).get('atol', -1),
solver_tol_rel = getattr(self.kondo, 'solveopts', {}).get('rtol', -1),
integral_method = getattr(self.kondo, "integral_method", -15),
d = self.kondo.d,
vdc = self.kondo.vdc,
vac = self.kondo.vac,
xL = self.kondo.xL,
nmax = self.kondo.nmax,
padding = self.kondo.padding,
voltage_branches = self.kondo.voltage_branches,
resonant_dc_shift = self.kondo.resonant_dc_shift,
solver_flags = solver_flags,
)
@property
def main_results(self):
"""
dictionary of main results: DC current, DC conductance, AC current (absolute value and phase)
"""
results = dict(
gamma = np.nan,
dc_current = np.nan,
dc_conductance = np.nan,
ac_current_abs = np.nan,
ac_current_phase = np.nan
)
nmax = self.kondo.nmax
vb = self.kondo.voltage_branches
if self.kondo.compact == 0:
try:
results['gamma'] = self.kondo.gamma[vb, nmax, nmax].real
except:
pass
try:
results['dc_current'] = self.kondo.gammaL[nmax, nmax].real
except:
pass
try:
results['dc_conductance'] = self.kondo.deltaGammaL[nmax, nmax].real
except:
pass
if nmax == 0:
results['ac_current_abs'] = 0
else:
try:
results['ac_current_abs'] = np.abs(self.kondo.gammaL[nmax-1, nmax])
results['ac_current_phase'] = np.angle(self.kondo.gammaL[nmax-1, nmax])
except:
pass
elif self.kondo.compact:
results['dc_current'] = 0
if nmax % 2:
try:
results['gamma'] = self.kondo.gamma.submatrix11[nmax//2, nmax//2].real
except:
pass
try:
results['dc_conductance'] = self.kondo.deltaGammaL.submatrix11[nmax//2, nmax//2].real
except:
pass
try:
results['ac_current_abs'] = np.abs(self.kondo.gammaL.submatrix01[nmax//2, nmax//2])
results['ac_current_phase'] = np.angle(self.kondo.gammaL.submatrix01[nmax//2, nmax//2])
except:
pass
else:
try:
results['gamma'] = self.kondo.gamma.submatrix00[nmax//2, nmax//2].real
except:
pass
try:
results['dc_conductance'] = self.kondo.deltaGammaL.submatrix00[nmax//2, nmax//2].real
except:
pass
try:
results['ac_current_abs'] = np.abs(self.kondo.gammaL.submatrix10[nmax//2-1, nmax//2])
results['ac_current_phase'] = np.angle(self.kondo.gammaL.submatrix10[nmax//2-1, nmax//2])
except:
pass
return results
@property
def fourier_coef(self):
return self.kondo.fourier_coef
def data(self, include='all'):
"""
dictionary of Floquet matrices as numpy arrays.
Argument include takes the following values:
"all": save all data (Floquet matrices including voltage shifts)
"reduced": exclude voltage shifts and yL
"observables: save only gamma, gammaL, deltaGammaL, excluding voltage
shifts
"minimal": save only central column of Floquet matrices for gamma,
gammaL, deltaGammaL, excluding voltage
"""
if include == 'all':
save = dict(
gamma = self.kondo.gamma.values,
z = self.kondo.z.values,
gammaL = self.kondo.gammaL.values,
deltaGammaL = self.kondo.deltaGammaL.values,
deltaGamma = self.kondo.deltaGamma.values,
yL = self.kondo.yL.values,
g2 = self.kondo.g2.to_numpy_array(),
g3 = self.kondo.g3.to_numpy_array(),
current = self.kondo.current.to_numpy_array(),
)
if self.kondo.include_Ga:
save["ga"] = self.kondo.ga.to_numpy_array()
elif include == 'reduced':
if self.kondo.voltage_branches:
vb = self.kondo.voltage_branches
save = dict(
gamma = self.kondo.gamma[vb],
z = self.kondo.z[vb],
gammaL = self.kondo.gammaL.values,
deltaGammaL = self.kondo.deltaGammaL.values,
deltaGamma = self.kondo.deltaGamma[min(vb,1)],
g2 = self.kondo.g2.to_numpy_array()[:,:,vb],
g3 = self.kondo.g3.to_numpy_array()[:,:,vb],
current = self.kondo.current.to_numpy_array(),
)
if self.kondo.include_Ga:
save["ga"] = self.kondo.ga.to_numpy_array()[:,:,vb]
else:
save = dict(
gamma = self.kondo.gamma.values,
z = self.kondo.z.values,
gammaL = self.kondo.gammaL.values,
deltaGammaL = self.kondo.deltaGammaL.values,
deltaGamma = self.kondo.deltaGamma.values,
g2 = self.kondo.g2.to_numpy_array(),
g3 = self.kondo.g3.to_numpy_array(),
current = self.kondo.current.to_numpy_array(),
)
if self.kondo.include_Ga:
save["ga"] = self.kondo.ga.to_numpy_array()
elif include == 'observables':
if self.kondo.voltage_branches:
vb = self.kondo.voltage_branches
save = dict(
gamma = self.kondo.gamma[vb],
gammaL = self.kondo.gammaL.values,
deltaGammaL = self.kondo.deltaGammaL.values,
)
else:
save = dict(
gamma = self.kondo.gamma.values,
gammaL = self.kondo.gammaL.values,
deltaGammaL = self.kondo.deltaGammaL.values,
)
elif include == 'minimal':
nmax = self.kondo.nmax
if self.kondo.voltage_branches:
vb = self.kondo.voltage_branches
save = dict(
gamma = self.kondo.gamma[vb,:,nmax],
gammaL = self.kondo.gammaL[:,nmax],
deltaGammaL = self.kondo.deltaGammaL[:,nmax],
)
else:
save = dict(
gamma = self.kondo.gamma[:,nmax],
gammaL = self.kondo.gammaL[:,nmax],
deltaGammaL = self.kondo.deltaGammaL[:,nmax],
)
else:
raise ValueError("Unknown value for include: " + include)
return save
def save_npz(self, filename, include="all"):
"""
Save data in binary numpy format.
"""
np.savez(filename, **self.metadata, **self.data(include))
def save_h5(self, filename, include='all', overwrite=False):
"""
Save data in HDF5 file.
Returns absolute path to filename where data have been saved.
If overwrite is False and a file would be overwritten, append a random
string to the end of the filename.
"""
while True:
try:
pathlib.Path(filename + '.lock').touch(exist_ok=False)
break
except FileExistsError:
try:
settings.logger.warning('File %s is locked, waiting ~0.5s'%filename)
sleep(0.4 + 0.2*random.random())
except KeyboardInterrupt:
answer = input('Ignore lock file? Then type "yes": ')
if answer.lower() == "yes":
break
answer = input('Save with filename extended by random string? (Yn): ')
if answer.lower()[0] != "n":
return self.save_h5(filename.removesuffix(".h5") + random_string(8) + ".h5", include, overwrite)
try:
file_exists = os.path.exists(filename)
h5file = None
while h5file is None:
try:
h5file = tb.open_file(filename, "a", MAX_NUMEXPR_THREADS=1, MAX_BLOSC_THREADS=1)
except tb.exceptions.HDF5ExtError:
settings.logger.warning('Error opening file %s, waiting 0.5s'%filename)
sleep(0.5)
try:
if file_exists:
try:
h5file.is_visible_node('/data/' + self.hash)
new_filename = filename.removesuffix(".h5") + random_string(8) + ".h5"
settings.logger.warning("Hash exists in file %s! Saving to %s"%(filename, new_filename))
return self.save_h5(new_filename, include, overwrite)
except tb.exceptions.NoSuchNodeError:
pass
metadata_table = h5file.get_node("/metadata/mdtable")
else:
# create new file
metadata_parent = h5file.create_group(h5file.root, "metadata", "Metadata")
metadata_table = h5file.create_table(metadata_parent,
'mdtable',
dict(
idnum = tb.Int32Col(),
hash = tb.StringCol(40),
omega = tb.Float64Col(),
energy = tb.ComplexCol(16),
version_major = tb.Int16Col(),
version_minor = tb.Int16Col(),
git_commit_count = tb.Int16Col(),
git_commit_id = tb.Int32Col(),
timestamp = tb.Time64Col(),
method = tb.EnumCol(KondoExport.METHOD_ENUM, 'unknown', 'int8'),
solver_method = tb.EnumCol(KondoExport.SOLVER_METHOD_ENUM, 'unknown', 'int8'),
solver_tol_abs = tb.Float64Col(),
solver_tol_rel = tb.Float64Col(),
integral_method = tb.Int16Col(),
d = tb.Float64Col(),
vdc = tb.Float64Col(),
vac = tb.Float64Col(),
xL = tb.Float64Col(),
nmax = tb.Int16Col(),
padding = tb.Int16Col(),
voltage_branches = tb.Int16Col(),
resonant_dc_shift = tb.Int16Col(),
solver_flags = tb.Int16Col(),
lazy_inverse_factor = tb.Float64Col(),
)
)
h5file.create_group(h5file.root, "data", "Floquet matrices")
h5file.flush()
# Save metadata
row = metadata_table.row
idnum = metadata_table.shape[0]
row['idnum'] = idnum
try:
if include != "all":
self.metadata["solver_flags"] |= DataManager.SOLVER_FLAGS["reduced"]
except:
settings.logger.exception("Error while updating solver flags")
metadata = self.metadata
row['method'] = KondoExport.METHOD_ENUM[metadata.pop('method')]
row['solver_method'] = KondoExport.SOLVER_METHOD_ENUM[metadata.pop('solver_method')]
for key, value in metadata.items():
try:
row[key] = value
except KeyError:
pass
row.append()
# save data
datagroup = h5file.create_group("/data/", self.hash)
data = self.data(include)
for key, value in data.items():
h5file.create_array(datagroup, key, value)
if self.fourier_coef is not None:
h5file.create_array(datagroup, "fourier_coef", np.array(self.fourier_coef))
h5file.flush()
finally:
h5file.close()
finally:
os.remove(filename + ".lock")
return os.path.abspath(filename)
class KondoImport:
"""
Class for importing Kondo objects that were saved with KondoExport.
Example usage:
>>> kondo, = KondoImport.read_from_h5("data/frtrg-01.h5", "94f81d2b49df15912798d95cae8e108d75c637c2")
>>> print(kondo.gammaL[kondo.nmax, kondo.nmax])
"""
def __init__(self, metadata, datanode, h5file, owns_h5file=False):
self.metadata = metadata
self._datanode = datanode
self._h5file = h5file
self._owns_h5file = owns_h5file
def __del__(self):
if self._owns_h5file:
settings.logger.info("closing h5file")
self._h5file.close()
@classmethod
def read_from_h5(cls, filename, khash):
h5file = tb.open_file(filename, "r")
datanode = h5file.get_node('/data/' + khash)
metadatatable = h5file.get_node('/metadata/mdtable')
counter = 0
for row in metadatatable.where(f"hash == '{khash}'"):
metadata = {key:row[key] for key in metadatatable.colnames}
metadata.pop("idnum", None)
metadata["hash"] = row["hash"].decode()
metadata["method"] = KondoExport.METHOD_ENUM(row["method"])
metadata["solver_method"] = KondoExport.SOLVER_METHOD_ENUM(row["solver_method"])
item = cls(metadata, datanode, h5file)
item._rawmetadata = row
yield item
counter += 1
if counter == 1:
item._owns_h5file = True
else:
settings.logger.warning("h5file will not be closed automatically")
@classmethod
def read_all_from_h5(cls, filename):
h5file = tb.open_file(filename)
metadatatable = h5file.get_node('/metadata/mdtable')
counter = 0
for row in metadatatable:
metadata = {key:row[key] for key in metadatatable.colnames}
metadata.pop("idnum", None)
metadata["hash"] = row["hash"].decode()
metadata["method"] = KondoExport.METHOD_ENUM(row["method"])
metadata["solver_method"] = KondoExport.SOLVER_METHOD_ENUM(row["solver_method"])
datanode = h5file.get_node("/data/" + metadata["hash"])
item = cls(metadata, datanode, h5file)
item._rawmetadata = row
yield item
counter += 1
if counter == 1:
item._owns_h5file = True
else:
settings.logger.warning("h5file will not be closed automatically")
@property
def main_results(self):
"""
dictionary of main results: DC current, DC conductance, AC current (absolute value and phase)
"""
results = dict(
gamma = np.nan,
dc_current = np.nan,
dc_conductance = np.nan,
ac_current_abs = np.nan,
ac_current_phase = np.nan
)
nmax = self.nmax
if self.method in ('unknown', 'mu', 'J', 'mu-reference', 'J-reference', 'mu-extrap-voltage', 'J-extrap-voltage'):
voltage_branches = self.voltage_branches
try:
results['dc_current'] = self.gammaL[nmax, nmax].real
except:
pass
try:
results['dc_conductance'] = self.deltaGammaL[nmax, nmax].real
except:
pass
try:
gamma = self._datanode["gamma"].read()
if gamma.ndim == 3:
results['gamma'] = gamma[voltage_branches, nmax, nmax].real
elif gamma.ndim == 2:
results['gamma'] = gamma[nmax, nmax].real
else:
results['gamma'] = gamma[nmax].real
except:
pass
if nmax == 0:
results['ac_current_abs'] = 0
else:
try:
results['ac_current_abs'] = np.abs(self.gammaL[nmax-1, nmax])
results['ac_current_phase'] = np.angle(self.gammaL[nmax-1, nmax])
except:
pass
elif self.method in ("J-compact-1", "J-compact-2"):
results['dc_current'] = 0
if nmax % 2:
try:
results['gamma'] = self.gamma.submatrix11[nmax//2, nmax//2].real
except:
pass
try:
results['dc_conductance'] = self.deltaGammaL.submatrix11[nmax//2, nmax//2].real
except:
pass
try:
results['ac_current_abs'] = np.abs(self.gammaL.submatrix01[nmax//2, nmax//2])
results['ac_current_phase'] = np.angle(self.gammaL.submatrix01[nmax//2, nmax//2])
except:
pass
else:
try:
results['gamma'] = self.gamma.submatrix00[nmax//2, nmax//2].real
except:
pass
try:
results['dc_conductance'] = self.deltaGammaL.submatrix00[nmax//2, nmax//2].real
except:
pass
try:
results['ac_current_abs'] = np.abs(self.gammaL.submatrix10[nmax//2-1, nmax//2])
results['ac_current_phase'] = np.angle(self.gammaL.submatrix10[nmax//2-1, nmax//2])
except:
pass
return results
@property
def fourier_coef(self):
if "fouier_coef" in self._datanode:
return self._datanode.fourier_coef.read()
return None
def __getitem__(self, name):
if name in self.metadata:
return self.metadata[name]
if name in self._datanode:
return self._datanode[name].read()
raise KeyError("Unknown key: %s"%name)
def __getattr__(self, name):
if name in self.metadata:
return self.metadata[name]
if name in self._datanode:
return self._datanode[name].read()
raise AttributeError("Unknown attribute name: %s"%name)
class DataManager:
"""
Database structure
tables:
datapoints (single data point)
"""
SOLVER_FLAGS = dict(
contains_flow = 0x0001,
reduced = 0x0002,
deleted = 0x0004,
simplified_initial_conditions = 0x0008,
enforce_symmetric = 0x0010,
check_symmetries = 0x0020,
ignore_symmetries = 0x0040,
extrapolate_voltage = 0x0080,
use_cublas = 0x0100,
use_reference_implementation = 0x0200,
second_order_rg_equations = 0x0400,
solve_integral_exactly = 0x0800,
include_Ga = 0x1000,
)
def __init__(self):
self.version = settings.VERSION
self.engine = db.create_engine(settings.DB_CONNECTION_STRING, future=True, echo=False)
self.metadata = db.MetaData()
try:
self.table = db.Table('datapoints', self.metadata, autoload=True, autoload_with=self.engine)
except db.exc.NoSuchTableError:
with self.engine.begin() as connection:
settings.logger.info('Creating database table datapoints')
self.table = db.Table(
'datapoints',
self.metadata,
db.Column('id', db.INTEGER(), primary_key=True),
db.Column('hash', db.CHAR(40)),
db.Column('version_major', db.SMALLINT()),
db.Column('version_minor', db.SMALLINT()),
db.Column('git_commit_count', db.SMALLINT()),
db.Column('git_commit_id', db.INTEGER()),
db.Column('timestamp', db.TIMESTAMP()),
db.Column('method', db.Enum('unknown', 'mu', 'J', 'J-compact-1', 'J-compact-2', 'mu-reference', 'J-reference')),
db.Column('solver_method', db.Enum('unknown', 'RK45', 'RK23', 'DOP853', 'Radau', 'BDF', 'LSODA', 'other')),
db.Column('solver_tol_abs', db.FLOAT()),
db.Column('solver_tol_rel', db.FLOAT()),
db.Column('omega', db.FLOAT()),
db.Column('d', db.FLOAT()),
db.Column('vdc', db.FLOAT()),
db.Column('vac', db.FLOAT()),
db.Column('xL', db.FLOAT()),
db.Column('energy_re', db.FLOAT()),
db.Column('energy_im', db.FLOAT()),
db.Column('lazy_inverse_factor', db.FLOAT()),
db.Column('dc_current', db.FLOAT()),
db.Column('ac_current_abs', db.FLOAT()),
db.Column('ac_current_phase', db.FLOAT()),
db.Column('dc_conductance', db.FLOAT()),
db.Column('gamma', db.FLOAT()),
db.Column('nmax', db.SMALLINT()),
db.Column('padding', db.SMALLINT()),
db.Column('voltage_branches', db.SMALLINT()),
db.Column('resonant_dc_shift', db.SMALLINT()),
db.Column('solver_flags', db.SMALLINT()),
db.Column('integral_method', db.SMALLINT()),
db.Column('dirname', db.String(256)),
db.Column('basename', db.String(128)),
db.Column('fourier_coef_id', db.INTEGER(), default=-1),
)
self.table.create(bind=connection)
try:
self.fourier_coef_table = db.Table('fourier_coef', self.metadata, autoload=True, autoload_with=self.engine)
except db.exc.NoSuchTableError:
with self.engine.begin() as connection:
settings.logger.info('Creating database table fourier_coef')
self.fourier_coef_table = db.Table(
'fourier_coef',
self.metadata,
db.Column('id', db.INTEGER(), primary_key=True),
db.Column('fc00', db.FLOAT(), default=0.),
db.Column('fc01', db.FLOAT(), default=0.),
db.Column('fc02', db.FLOAT(), default=0.),
db.Column('fc03', db.FLOAT(), default=0.),
db.Column('fc04', db.FLOAT(), default=0.),
db.Column('fc05', db.FLOAT(), default=0.),
db.Column('fc06', db.FLOAT(), default=0.),
db.Column('fc07', db.FLOAT(), default=0.),
db.Column('fc08', db.FLOAT(), default=0.),
db.Column('fc09', db.FLOAT(), default=0.),
db.Column('fc10', db.FLOAT(), default=0.),
db.Column('fc11', db.FLOAT(), default=0.),
)
self.fourier_coef_table.create(bind=connection)
def insert_from_h5file(self, filename):
"""
Scan data in HDF5 file and insert datasets in database if they are not
included yet.
"""
filename = os.path.abspath(filename)
dirname = os.path.dirname(filename)
basename = os.path.basename(filename)
datasets = []
skipped = 0
for dataset in KondoImport.read_all_from_h5(filename):
settings.logger.debug("Checking hash=" + dataset.hash)
candidates = self.df_table.loc[self.df_table.hash == dataset.hash]
if candidates.shape[0] > 0:
settings.logger.debug("Found %d times the same hash"%candidates.shape[0])
exists = False
for idx, candidate in candidates.iterrows():
if os.path.join(candidate.dirname, candidate.basename) == filename:
exists = True
break
if exists:
settings.logger.debug("Entry exists, skipping")
skipped += 1
continue
else:
settings.logger.debug("File seems new, continuing anyway")
metadata = dataset.metadata.copy()
energy = metadata.pop('energy')
metadata.update(
energy_re = energy.real,
energy_im = energy.imag,
timestamp = datetime.fromtimestamp(metadata.pop("timestamp")).isoformat().replace('T', ' '),
dirname = dirname,
basename = basename,
fourier_coef_id = -1,
)
metadata.update(dataset.main_results)
fourier_coef = dataset.fourier_coef
if fourier_coef is not None:
stmt = self.fourier_coef_table.insert(values={'fc%02d'%i:f for i,f in enumerate(dataset.fourier_coef[:12])})
with self.engine.begin() as connection:
result = connection.execute(stmt)
connection.commit()
metadata.update(fourier_coef_id = result.inserted_primary_key)
datasets.append(metadata)
try:
if not dataset._owns_h5file:
dataset._h5file.close()
settings.logger.info("Closed HDF5 file")
except:
pass
settings.logger.info("Inserting %d new entries, ignoring %d"%(len(datasets), skipped))
new_frame = pd.DataFrame(datasets)
new_frame.to_sql(
'datapoints',
self.engine,
if_exists = 'append',
index = False,
)
del self.df_table
def insert_in_db(self, filename : str, kondo : KondoExport):
"""
Save metadata in database for data stored in filename.
"""
metadata = kondo.metadata
metadata.update(kondo.main_results)
energy = metadata.pop('energy')
metadata.update(
energy_re = energy.real,
energy_im = energy.imag,
timestamp = datetime.fromtimestamp(metadata.pop("timestamp")).isoformat().replace('T', ' '),
dirname = os.path.dirname(filename),
basename = os.path.basename(filename),
)
fourier_coef = kondo.fourier_coef
if fourier_coef is not None:
stmt = self.fourier_coef_table.insert(values={'fc%02d'%i:f for i,f in enumerate(fourier_coef[:12])})
with self.engine.begin() as connection:
result = connection.execute(stmt)
connection.commit()
metadata.update(fourier_coef_id = result.inserted_primary_key)
frame = pd.DataFrame(metadata, index=[0])
frame.to_sql(
'datapoints',
self.engine,
if_exists='append',
index=False,
)
try:
del self.df_table
except AttributeError:
pass
def save_h5(self, kondo : KondoExport, filename : str = None, include='all', overwrite=False):
"""
Save all data in given filename and keep metadata in database.
"""
if filename is None:
filename = os.path.join(settings.BASEPATH, settings.FILENAME)
if not isinstance(kondo, KondoExport):
kondo = KondoExport(kondo)
filename = kondo.save_h5(filename, include, overwrite)
self.insert_in_db(filename, kondo)
def cache_df_table(self, min_version=(0,5,-1)):
settings.logger.debug('DataManager: cache df_table')
with self.engine.begin() as connection:
df_table = pd.read_sql_table('datapoints', connection, index_col='id')
selection = (df_table.solver_flags & DataManager.SOLVER_FLAGS['deleted']) == 0
selection &= (df_table.version_major > min_version[0]) | ( (df_table.version_major == min_version[0]) & (df_table.version_minor >= min_version[1]) )
selection &= df_table.energy_re == 0
selection &= df_table.energy_im == 0
if len(min_version) > 2 and min_version[2] > 0:
selection &= df_table.git_commit_count >= min_version[2]
self.df_table = df_table[selection]
def __getattr__(self, name):
if name == 'df_table':
self.cache_df_table()
return self.df_table
def list(self, min_version=(14,0,-1,-1), **parameters):
"""
Print and return DataFrame with selection of physical parameters.
"""
selection = (self.df_table.version_major > min_version[0]) | (self.df_table.version_major == min_version[0]) & (self.df_table.version_minor >= min_version[1])
selection &= self.df_table.energy_re == 0
selection &= self.df_table.energy_im == 0
include_Ga = parameters.pop("include_Ga", None)
if include_Ga:
selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["include_Ga"]) != 0
elif include_Ga == False:
selection &= (~self.df_table.solver_flags & DataManager.SOLVER_FLAGS["include_Ga"]) != 0
truncation_order = parameters.pop("truncation_order", None)
if truncation_order == 2:
selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["second_order_rg_equations"]) != 0
elif truncation_order == 3:
selection &= (self.df_table.solver_flags & DataManager.SOLVER_FLAGS["second_order_rg_equations"]) == 0
if len(min_version) > 2 and min_version[2] > 0:
selection &= self.df_table.git_commit_count >= min_version[2]
method = parameters.pop("method", None)
if method == "J":
selection &= self.df_table.method != "mu"
elif method is not None:
selection &= self.df_table.method == method
for key, value in parameters.items():
if value is None:
continue
try:
selection &= np.isclose(self.df_table[key], value, rtol=1e-6, atol=1e-15)
except TypeError:
try:
selection &= self.df_table[key] == value
except KeyError:
settings.logger.warning("Unknown key: %s"%key)
if selection is True:
result = self.df_table
else:
result = self.df_table.loc[selection]
return result
def load_all(self, **parameters):
table = self.list(**parameters)
for ((dirname, basename), subtable) in table.groupby(["dirname", "basename"]):
try:
h5file = tb.open_file(os.path.join(dirname, basename))
except:
settings.logger.exception("Error while loading HDF5 file")
continue
#metadatatable = h5file.get_node('/metadata/mdtable')
for index, row in subtable.iterrows():
try:
datanode = h5file.get_node("/data/" + row.hash)
#metadatarow = metadatatable.where("hash == '%s'"%(row.hash))
yield KondoImport(row, datanode, h5file, owns_h5file=subtable.shape[0]==1)
except:
settings.logger.exception("Error while loading data")
def list_data(**kwargs):
table = DataManager().list(**kwargs)
print(result[['method', 'vdc', 'vac', 'omega', 'nmax', 'voltage_branches', 'padding', 'dc_current', 'dc_conductance', 'gamma', 'ac_current_abs']])