Something went wrong on our end
Select Git revision
-
Christoph von Oy authoredChristoph von Oy authored
createdataset.py 1.80 KiB
import argparse
import os
from tqdm import tqdm
import tarfile
from chargefile import ChargeFile
from spectrafile import SpectraFile
if __name__ == "__main__":
parser = argparse.ArgumentParser("KissExpiramentCreateDataset")
parser.add_argument('--data', '-d', required=True, help="Data input directory")
parser.add_argument('--out', '-o', required=True, help="output directory")
args = parser.parse_args()
filenames = [f for f in os.listdir(args.data) if os.path.isfile(os.path.join(args.data, f))]
charge_filenames = [f for f in filenames if f.startswith("charge") or f.startswith("single_cell_")]
spectra_filenames = [f for f in filenames if not f.startswith("charge") and not f.startswith("single_cell_") and f != "expiramentlog.csv"]
print(f"found {len(spectra_filenames)} spectra")
print(f"found {len(charge_filenames)} charge/discharge sequences")
if not os.path.exists(args.out):
os.makedirs(args.out)
charge_files = list()
for filename in charge_filenames:
charge_files.append(ChargeFile(os.path.join(args.data, filename)))
cells = set()
for filename in tqdm(spectra_filenames):
tokens = filename.split('.')[0].split('-')
cellid = int(tokens[1])
cells.add(cellid)
print(f"{len(cells)} cells where involved")
for filename in tqdm(spectra_filenames):
tokens = filename.split('.')[0].split('-')
step = int(tokens[0])
cellid = int(tokens[1])
substep = int(tokens[2])
sf = SpectraFile(os.path.join(args.data, filename), cellid, step, substep, charge_files, len(cells))
sf.write(args.out)
try:
os.remove(f"{args.out}.tar")
except FileNotFoundError:
pass
tar = tarfile.open(f"{args.out}.tar", mode="x")
for filename in tqdm(os.listdir(args.out)):
path = os.path.join(args.out, filename)
tar.add(path, arcname=os.path.split(path)[-1])
os.remove(path)
os.rmdir(args.out)
tar.close()