import argparse
import os
from tqdm import tqdm
import tarfile

from chargefile import ChargeFile
from spectrafile import SpectraFile


if __name__ == "__main__":
	parser = argparse.ArgumentParser("KissExpiramentCreateDataset")
	parser.add_argument('--data', '-d', required=True, help="Data input directory")
	parser.add_argument('--out', '-o', required=True, help="output directory")
	args = parser.parse_args()

	filenames = [f for f in os.listdir(args.data) if os.path.isfile(os.path.join(args.data, f))]
	charge_filenames = [f for f in filenames if f.startswith("charge") or f.startswith("single_cell_")]
	spectra_filenames = [f for f in filenames if not f.startswith("charge") and not f.startswith("single_cell_") and f != "expiramentlog.csv"]

	print(f"found {len(spectra_filenames)} spectra")
	print(f"found {len(charge_filenames)} charge/discharge sequences")

	if not os.path.exists(args.out):
		os.makedirs(args.out)

	charge_files = list()
	for filename in charge_filenames:
		charge_files.append(ChargeFile(os.path.join(args.data, filename)))

	cells = set()
	for filename in tqdm(spectra_filenames):
		tokens = filename.split('.')[0].split('-')
		cellid = int(tokens[1])
		cells.add(cellid)

	print(f"{len(cells)} cells where involved")

	for filename in tqdm(spectra_filenames):
		tokens = filename.split('.')[0].split('-')
		step = int(tokens[0])
		cellid = int(tokens[1])
		substep = int(tokens[2])
		sf = SpectraFile(os.path.join(args.data, filename), cellid, step, substep, charge_files, len(cells))
		sf.write(args.out)

	try:
		os.remove(f"{args.out}.tar")
	except FileNotFoundError:
		pass
	tar = tarfile.open(f"{args.out}.tar", mode="x")
	for filename in tqdm(os.listdir(args.out)):
		path = os.path.join(args.out, filename)
		tar.add(path, arcname=os.path.split(path)[-1])
		os.remove(path)
	os.rmdir(args.out)
	tar.close()