diff --git a/scripts/cellmeta.py b/scripts/cellmeta.py
index 33d4bb28a91b803b1f5a9a194a1559b49799e4aa..f9fe0df71640572c1061207877d56e08a0bd765f 100644
--- a/scripts/cellmeta.py
+++ b/scripts/cellmeta.py
@@ -85,7 +85,8 @@ class CellMeta:
 	def __init__(self, cellid: int, globalstep: int, substep: int, charge_files: list[ChargeFile], total_cells: int):
 		closest_avg = ChargeFile.FindClosest(charge_files, globalstep, -1)
 		closest_charge = ChargeFile.FindClosest(charge_files, globalstep, cellid)
-		assert closest_charge.cell == cellid
+		if closest_charge is not None:
+			assert closest_charge.cell == cellid
 
 		self.charge_cycles = charge_cycles_at_step(globalstep, substep) if cellid not in non_charge_cycle_cell else 0
 		self.thermal_cycles = thermal_cycles_at_step(globalstep, substep) if cellid not in non_thermal_cycle_cell else 0
@@ -94,3 +95,4 @@ class CellMeta:
 		self.last_cap = abs(closest_charge.capacity) if closest_charge is not None else -1
 		self.last_cap_step = closest_charge.step if closest_charge is not None else -1
 		self.thermal_range = cell_thermal_range[cellid]
+		self.soc = ChargeFile.GetSoc(charge_files, globalstep, cellid, total_cells)
diff --git a/scripts/chargefile.py b/scripts/chargefile.py
index 0dd053bd81e1628327270370b7cbfe80220d57a0..567c80fcda983ab01642d5e0c787b9a587799125 100644
--- a/scripts/chargefile.py
+++ b/scripts/chargefile.py
@@ -20,12 +20,12 @@ def calc_capacity(charge_curve: list[dict]):
 
 class ChargeFile:
 	def __init__(self, filename: str):
-		self.start_voltage = 0
-		self.end_voltage = 0
-		self.capacity = 0
+		self.start_voltage = 0.0
+		self.end_voltage = 0.0
+		self.capacity = 0.0
 		self.cell = -1
 		self.discharge = False
-		self.current = 0
+		self.current = 0.0
 		self.full_cycle = False
 		self.step = 0
 
@@ -54,11 +54,14 @@ class ChargeFile:
 			self.capacity = calc_capacity(charge_curve)
 			self.full_cycle = self.start_voltage > 4.05 and self.end_voltage < 3.15 or self.start_voltage < 3.15 and self.end_voltage > 4.05
 
-	def FindClosest(charge_files: list, step, cellid=-1, full_cycle=True):
+	@staticmethod
+	def FindClosest(charge_files: list, step: int, cellid: int = -1, full_cycle=True, only_before=False):
 		closest_file = None
 		for charge_file in charge_files:
 			if charge_file.cell != cellid:
 				continue
+			if only_before and charge_file.step > step:
+				continue
 			if not full_cycle or charge_file.full_cycle:
 				if closest_file is not None:
 					if abs(step - closest_file.step) > abs(step - charge_file.step):
@@ -68,3 +71,36 @@ class ChargeFile:
 		if closest_file is None:
 			print(f"Warning could not find a charge {"full" if full_cycle else ""} file close to {step} for cell {cellid}")
 		return closest_file
+
+	@staticmethod
+	def GetSoc(charge_files: list, step: int, cellid: int, cell_count: int) -> float:
+
+		common_closest_full = ChargeFile.FindClosest(charge_files, step, -1, True, True)
+		specific_closest_full = ChargeFile.FindClosest(charge_files, step, cellid, True, True)
+
+		if specific_closest_full is None and common_closest_full is None:
+			return -1.0
+
+		if common_closest_full is None:
+			closest_full = specific_closest_full
+		elif specific_closest_full is None:
+			closest_full = common_closest_full
+		elif step - specific_closest_full.step < step - common_closest_full.step:
+			closest_full = specific_closest_full
+		else:
+			closest_full = common_closest_full
+
+		charge_counter = 0.0
+		for charge_file in charge_files:
+			if charge_file.step <= step and charge_file.step > closest_full.step:
+				if charge_file.cell == -1:
+					charge_counter += charge_file.capacity / cell_count
+				else:
+					charge_counter += charge_file.capacity
+
+		full_cap = closest_full.capacity
+		if closest_full.cell == -1:
+			full_cap = full_cap / cell_count
+
+		soc = abs(charge_counter) / abs(full_cap)
+		return soc
diff --git a/scripts/createdataset.py b/scripts/createdataset.py
index 9493e416e3c96dd2fde2eba2f733e1e7b9f84467..a7d126dedc44dd977c22288db0400fa5a65f6da1 100644
--- a/scripts/createdataset.py
+++ b/scripts/createdataset.py
@@ -10,7 +10,6 @@ if __name__ == "__main__":
 	parser = argparse.ArgumentParser("KissExpiramentCreateDataset")
 	parser.add_argument('--data', '-d', required=True, help="Data input directory")
 	parser.add_argument('--out', '-o', required=True, help="output directory")
-	parser.add_argument('--cell_count', '-c', type=int, required=True, help="number of active cells")
 	args = parser.parse_args()
 
 	filenames = [f for f in os.listdir(args.data) if os.path.isfile(os.path.join(args.data, f))]
@@ -28,19 +27,21 @@ if __name__ == "__main__":
 		charge_files.append(ChargeFile(os.path.join(args.data, filename)))
 
 	cells = set()
+	for filename in tqdm(spectra_filenames):
+		tokens = filename.split('.')[0].split('-')
+		cellid = int(tokens[1])
+		cells.add(cellid)
+
+	print(f"{len(cells)} cells where involved")
 
 	for filename in tqdm(spectra_filenames):
 		tokens = filename.split('.')[0].split('-')
 		step = int(tokens[0])
 		cellid = int(tokens[1])
 		substep = int(tokens[2])
-		cells.add(cellid)
 		celldir = os.path.join(args.out, str(cellid))
 		if not os.path.exists(celldir):
 			os.makedirs(celldir)
-		sf = SpectraFile(os.path.join(args.data, filename), cellid, step, substep, charge_files, args.cell_count)
+		sf = SpectraFile(os.path.join(args.data, filename), cellid, step, substep, charge_files, len(cells))
 		sf.write(celldir)
 
-	if len(cells) != int(args.cell_count):
-		print(f"INCORRECT CELL COUNT!! found {len(cells)} but expected {args.cell_count}")
-
diff --git a/scripts/spectrafile.py b/scripts/spectrafile.py
index 7cdadb67349746c0ea166e9763c313a511f9c9ca..3475e1e2278054152493928771fdead8c8924771 100644
--- a/scripts/spectrafile.py
+++ b/scripts/spectrafile.py
@@ -12,8 +12,8 @@ class SpectraFile:
 		self.step = step
 		self.substep = substep
 		self.filename = filename
-		self.temperature = -1
-		self.ocv = -1
+		self.temperature = -1.0
+		self.ocv = -1.0
 		self.meta = CellMeta(cellid, step, substep, charge_files, total_cells)
 		self.filename = os.path.split(filename)[1]
 
@@ -26,9 +26,9 @@ class SpectraFile:
 			raise ParseError(f"file name and file content of SpectraFile {filename} do not match")
 
 	def write(self, directory: str):
-		meta_dsc_string = "step, substep, cellid, temparature, ocv, charge_cycles, thermal_cycles, last_avg_cap, last_avg_step, last_cap, last_cap_step"
+		meta_dsc_string = "step, substep, cellid, temparature, ocv, charge_cycles, thermal_cycles, last_avg_cap, last_avg_step, last_cap, last_cap_step, soc"
 		metastring = f"{self.step}, {self.substep}, {self.cellid}, {self.temperature}, {self.ocv}, {self.meta.charge_cycles}, {self.meta.thermal_cycles}, "
-		metastring += f"{self.meta.last_avg_cap}, {self.meta.last_avg_cap_step}, {self.meta.last_cap}, {self.meta.last_cap_step}"
+		metastring += f"{self.meta.last_avg_cap}, {self.meta.last_avg_cap_step}, {self.meta.last_cap}, {self.meta.last_cap_step}, {self.meta.soc}"
 
 		self.spectra.headerDescription = meta_dsc_string
 		self.spectra.header = metastring