From d2e57344da775a41061b99ded5daa3dfccf1294c Mon Sep 17 00:00:00 2001
From: Carl Philipp Klemm <philipp@uvos.xyz>
Date: Tue, 27 Jun 2023 11:08:57 +0200
Subject: [PATCH] add remove duplicates function that deduplicates freqency
 entries in files improve relaxis translateor

---
 basicmath.cpp            |  13 +++
 eisgenerator/basicmath.h |   1 +
 eisgenerator/eistype.h   |  20 ++++
 eistype.cpp              | 210 ++++++++++++++++++++++-----------------
 model.cpp                |  13 +--
 strops.cpp               |  27 ++++-
 strops.h                 |   3 +
 test.cpp                 |  50 ++++++++--
 translators.cpp          |   6 +-
 9 files changed, 230 insertions(+), 113 deletions(-)

diff --git a/basicmath.cpp b/basicmath.cpp
index 389c55d..393e2a7 100644
--- a/basicmath.cpp
+++ b/basicmath.cpp
@@ -208,6 +208,19 @@ fvalue eis::maximumNyquistJump(const std::vector<eis::DataPoint>& data)
 	return maxDist;
 }
 
+void eis::removeDuplicates(std::vector<eis::DataPoint>& data)
+{
+	std::sort(data.begin(), data.end());
+
+	std::vector<eis::DataPoint>::iterator it = data.begin();
+	while((it = std::adjacent_find(data.begin(), data.end())) != data.end())
+	{
+		std::cout<<"erase\n";
+		data.erase(it);
+	}
+
+}
+
 bool eis::fvalueEq(fvalue a, fvalue b, fvalue epsilon)
 {
 	return a - epsilon < b && a + epsilon > b;
diff --git a/eisgenerator/basicmath.h b/eisgenerator/basicmath.h
index 0f7f40b..88eac48 100644
--- a/eisgenerator/basicmath.h
+++ b/eisgenerator/basicmath.h
@@ -17,6 +17,7 @@ namespace eis
 	fvalue nyquistAreaVariance(const std::vector<eis::DataPoint>& data, eis::DataPoint* centroid = nullptr);
 	fvalue maximumNyquistJump(const std::vector<eis::DataPoint>& data);
 	void noise(std::vector<eis::DataPoint>& data, double amplitude, bool relative);
+	void removeDuplicates(std::vector<eis::DataPoint>& data);
 	bool fvalueEq(fvalue a, fvalue b, fvalue epsilon = 0.001);
 }
 
diff --git a/eisgenerator/eistype.h b/eisgenerator/eistype.h
index 0f6df5a..a9f02df 100644
--- a/eisgenerator/eistype.h
+++ b/eisgenerator/eistype.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <complex>
 #include <vector>
+#include <valarray>
 #include <cassert>
 #include <cmath>
 #include <filesystem>
@@ -17,6 +18,14 @@ public:
 	fvalue omega;
 	DataPoint() = default;
 	DataPoint(std::complex<fvalue> imIn, fvalue omegaIn = 0): im(imIn), omega(omegaIn){}
+	bool operator<(const DataPoint& in) const
+	{
+		return omega < in.omega;
+	}
+	bool operator>(const DataPoint& in) const
+	{
+		return omega > in.omega;
+	}
 	bool operator==(const DataPoint& in) const
 	{
 		return im == in.im;
@@ -138,6 +147,12 @@ public:
 
 class EisSpectra
 {
+public:
+	static constexpr int F_VERSION_MAJOR = 1;
+	static constexpr int F_VERSION_MINOR = 0;
+	static constexpr int F_VERSION_PATCH = 0;
+	static constexpr char F_MAGIC[] = "EISF";
+
 public:
 	std::vector<DataPoint> data;
 	std::string model;
@@ -155,7 +170,9 @@ public:
 			   std::vector<size_t> labelsIn, std::vector<std::string> labelNamesIn = std::vector<std::string>());
 	EisSpectra(const std::vector<DataPoint>& dataIn, const std::string& modelIn, const std::string& headerIn,
 			   size_t label, size_t maxLabel, std::vector<std::string> labelNamesIn = std::vector<std::string>());
+	EisSpectra(const std::filesystem::path& path){*this = loadFromDisk(path);}
 	EisSpectra(){}
+	static EisSpectra loadFromDisk(const std::filesystem::path& path);
 	void setLabel(size_t label, size_t maxLabel);
 	size_t getLabel();
 	void setSzLabels(std::vector<size_t> label);
@@ -164,12 +181,15 @@ public:
 	std::vector<size_t> getSzLabels() const;
 	bool isMulticlass();
 	std::vector<fvalue> getFvalueLabels();
+	bool saveToDisk(const std::filesystem::path& path) const;
 };
 
 bool saveToDisk(const EisSpectra& data, const std::filesystem::path& path);
 
 EisSpectra loadFromDisk(const std::filesystem::path& path);
 
+std::pair<std::valarray<fvalue>, std::valarray<fvalue>> eisToValarrays(const std::vector<eis::DataPoint>& b);
+
 fvalue eisDistance(const std::vector<eis::DataPoint>& a, const std::vector<eis::DataPoint>& b);
 
 fvalue eisNyquistDistance(const std::vector<eis::DataPoint>& a, const std::vector<eis::DataPoint>& b);
diff --git a/eistype.cpp b/eistype.cpp
index 7c6e944..06c0424 100644
--- a/eistype.cpp
+++ b/eistype.cpp
@@ -7,104 +7,20 @@
 
 #include "strops.h"
 #include "log.h"
+#include "basicmath.h"
 
 using namespace eis;
 
 bool eis::saveToDisk(const EisSpectra& data, const std::filesystem::path& path)
 {
-	std::fstream file;
-	file.open(path, std::ios_base::out | std::ios_base::trunc);
-	if(!file.is_open())
-	{
-		Log(Log::ERROR)<<"can not open "<<path<<" for writing\n";
-		return false;
-	}
-	file<<std::scientific;
-
-	file<<data.model<<(!data.header.empty() ? ", " : "");
-	file<<data.header;
-
-	if(!data.labels.empty())
-	{
-		if(!data.labelNames.empty())
-		{
-			file<<"\nlabelsNames\n";
-			std::string labelLine;
-			for(const std::string& name : data.labelNames)
-				labelLine += "\"" + name + "\", ";
-			labelLine.pop_back();
-			labelLine.pop_back();
-			file<<labelLine;
-		}
-		file<<"\nlabels\n";
-
-		std::string labelLine;
-		for(double label : data.labels)
-			labelLine += std::to_string(label) + ", ";
-		labelLine.pop_back();
-		labelLine.pop_back();
-		file<<labelLine;
-	}
-
-	file<<"\nomega, real, im\n";
-
-	for(const eis::DataPoint& point : data.data)
-		file<<point.omega<<", "<<point.im.real()<<", "<<point.im.imag()<<'\n';
-	file.close();
-	return true;
+	Log(Log::INFO)<<__func__<<" is deprecated";
+	return data.saveToDisk(path);
 }
 
 EisSpectra eis::loadFromDisk(const std::filesystem::path& path)
 {
-	EisSpectra out;
-	std::fstream file;
-	file.open(path, std::ios_base::in);
-	if(!file.is_open())
-		throw file_error("can not open " + path.string() + " for reading\n");
-
-	std::string line;
-	std::getline(file, line);
-	std::vector<std::string> tokens = tokenizeBinaryIgnore(line, ',', '"', '\\');
-	out.model = tokens[0];
-	line.erase(line.begin(), line.begin()+tokens.size());
-	out.header = line;
-
-	while(file.good())
-	{
-		std::getline(file, line);
-		if(line.starts_with("labelsNames"))
-		{
-			std::getline(file, line);
-			out.labelNames = tokenizeBinaryIgnore(line, ',', '"', '\\');
-			continue;
-		}
-		else if(line.starts_with("labels"))
-		{
-			std::getline(file, line);
-			std::vector<std::string> tokens = tokenizeBinaryIgnore(line, ',', '"', '\\');
-			for(const std::string& token : tokens)
-				out.labels.push_back(std::stod(token));
-			continue;
-		}
-		else if(line.empty() || line[0] == '#' || line.starts_with("omega"))
-		{
-			continue;
-		}
-		tokens = tokenize(line, ',');
-		if(tokens.size() != 3)
-			throw file_error("invalid line in " + path.string() + ": " + line);
-
-		#pragma GCC diagnostic push
-		#pragma GCC diagnostic ignored "-Wnarrowing"
-		if constexpr (std::is_same<fvalue, double>::value)
-			out.data.push_back(DataPoint({std::stod(tokens[1]), std::stod(tokens[2])}, std::stod(tokens[0])));
-		else
-			out.data.push_back(DataPoint({std::stof(tokens[1]), std::stof(tokens[2])}, std::stof(tokens[0])));
-		#pragma GCC diagnostic pop
-	}
-
-	file.close();
-	return out;
+	Log(Log::INFO)<<__func__<<" is deprecated";
+	return EisSpectra(path);
 }
 
 void eis::Range::print(int level) const
@@ -333,3 +249,119 @@ std::vector<fvalue> EisSpectra::getFvalueLabels()
 		return out;
 	}
 }
+
+bool EisSpectra::saveToDisk(const std::filesystem::path& path) const
+{
+	std::fstream file;
+	file.open(path, std::ios_base::out | std::ios_base::trunc);
+	if(!file.is_open())
+	{
+		Log(Log::ERROR)<<"can not open "<<path<<" for writing\n";
+		return false;
+	}
+	file<<std::scientific;
+	file<<F_MAGIC<<", "<<std::to_string(F_VERSION_MAJOR)<<'.'
+		<<std::to_string(F_VERSION_MINOR)<<'.'<<std::to_string(F_VERSION_PATCH)<<'\n';
+
+	file<<'"'<<model<<'"'<<(!header.empty() ? ", " : "");
+	file<<header;
+
+	if(!labels.empty())
+	{
+		if(!labelNames.empty())
+		{
+			file<<"\nlabelsNames\n";
+			std::string labelLine;
+			for(const std::string& name : labelNames)
+				labelLine += "\"" + name + "\", ";
+			labelLine.pop_back();
+			labelLine.pop_back();
+			file<<labelLine;
+		}
+		file<<"\nlabels\n";
+
+		std::string labelLine;
+		for(double label : labels)
+			labelLine += std::to_string(label) + ", ";
+		labelLine.pop_back();
+		labelLine.pop_back();
+		file<<labelLine;
+	}
+
+	file<<"\nomega, real, im\n";
+
+	for(const eis::DataPoint& point : data)
+		file<<point.omega<<", "<<point.im.real()<<", "<<point.im.imag()<<'\n';
+	file.close();
+	return true;
+}
+
+EisSpectra EisSpectra::loadFromDisk(const std::filesystem::path& path)
+{
+	EisSpectra out;
+	std::fstream file;
+	file.open(path, std::ios_base::in);
+	if(!file.is_open())
+		throw file_error("can not open " + path.string() + " for reading\n");
+
+	std::string line;
+	std::getline(file, line);
+	std::vector<std::string> tokens = tokenizeBinaryIgnore(line, ',', '"', '\\');
+
+	if(tokens.size() < 2 || tokens[0] != F_MAGIC)
+	{
+		throw file_error(path.string() + " is not a valid EISGenerator file");
+	}
+	else
+	{
+		std::vector<std::string> versionTokens = tokenize(tokens[1], '.');
+		if(versionTokens.size() != 3 || std::stoi(versionTokens[0]) > F_VERSION_MAJOR || std::stoi(versionTokens[1]) > F_VERSION_MINOR)
+			throw file_error(path.string() + " was saved by a newer version of EISGenerator, can not open");
+	}
+
+	std::getline(file, line);
+	tokens = tokenizeBinaryIgnore(line, ',', '"', '\\');
+	stripQuotes(tokens[0]);
+	out.model = tokens[0];
+	line.erase(line.begin(), line.begin()+tokens.size());
+	out.header = line;
+
+	while(file.good())
+	{
+		std::getline(file, line);
+		if(line.starts_with("labelsNames"))
+		{
+			std::getline(file, line);
+			out.labelNames = tokenizeBinaryIgnore(line, ',', '"', '\\');
+			continue;
+		}
+		else if(line.starts_with("labels"))
+		{
+			std::getline(file, line);
+			std::vector<std::string> tokens = tokenizeBinaryIgnore(line, ',', '"', '\\');
+			for(const std::string& token : tokens)
+				out.labels.push_back(std::stod(token));
+			continue;
+		}
+		else if(line.empty() || line[0] == '#' || line.starts_with("omega"))
+		{
+			continue;
+		}
+		tokens = tokenize(line, ',');
+		if(tokens.size() != 3)
+			throw file_error("invalid line in " + path.string() + ": " + line);
+
+		#pragma GCC diagnostic push
+		#pragma GCC diagnostic ignored "-Wnarrowing"
+		if constexpr (std::is_same<fvalue, double>::value)
+			out.data.push_back(DataPoint({std::stod(tokens[1]), std::stod(tokens[2])}, std::stod(tokens[0])));
+		else
+			out.data.push_back(DataPoint({std::stof(tokens[1]), std::stof(tokens[2])}, std::stof(tokens[0])));
+		#pragma GCC diagnostic pop
+
+		eis::removeDuplicates(out.data);
+	}
+
+	file.close();
+	return out;
+}
diff --git a/model.cpp b/model.cpp
index ecf854f..24c1b23 100644
--- a/model.cpp
+++ b/model.cpp
@@ -150,18 +150,7 @@ Model::Model(const std::string& str, size_t paramSweepCount, bool defaultToRange
 {
 	size_t bracketCounter = 0;
 	std::string strCpy(str);
-	_model = nullptr;
-	try
-	{
-		_model = processBrackets(strCpy, bracketCounter, paramSweepCount, defaultToRange);
-	}
-	catch(const parse_errror& err)
-	{
-		Log(Log::ERROR)<<err.what();
-		if(_model != nullptr)
-			delete _model;
-		_model = nullptr;
-	}
+	_model = processBrackets(strCpy, bracketCounter, paramSweepCount, defaultToRange);
 }
 
 Model::Model(const Model& in)
diff --git a/strops.cpp b/strops.cpp
index a410a4d..96354b1 100644
--- a/strops.cpp
+++ b/strops.cpp
@@ -83,12 +83,22 @@ std::vector<std::string> tokenizeBinaryIgnore(const std::string& str, const char
 	return tokens;
 }
 
-size_t opposingBraket(const std::string& str, size_t index, char bracketChar)
+size_t opposingBraket(const std::string& str, size_t index, char closeBracketChar)
 {
+	char openBracket = str[index];
+	int counter = 0;
 	for(size_t i = index; i < str.size(); ++i)
 	{
-		if(str[i] == bracketChar)
-			return i;
+		if(str[i] == openBracket)
+		{
+			++counter;
+		}
+		else if(str[i] == closeBracketChar)
+		{
+			--counter;
+			if(counter < 1)
+				return i;
+		}
 	}
 	return std::string::npos;
 }
@@ -126,11 +136,22 @@ size_t deepestBraket(const std::string& str, std::string bracketChars, size_t* l
 	return deepestPos;
 }
 
+void stripQuotes(std::string& in)
+{
+	in.erase(std::remove_if(in.begin(), in.end()+1, [](unsigned char ch){return ch == '"' || ch == '\'';}));
+}
+
 size_t eisRemoveUnneededBrackets(std::string& in, long int bracketStart)
 {
 	bool bracketNeeded = false;
 	size_t paramBracketCount = 0;
 
+	if(bracketStart == 0 && opposingBraket(in, bracketStart, ')') == in.size()-1 )
+	{
+		in.pop_back();
+		in.erase(in.begin());
+	}
+
 	for(size_t i = (bracketStart >= 0 ? bracketStart+1 : 0); i < in.size(); ++i)
 	{
 		if(paramBracketCount == 0)
diff --git a/strops.h b/strops.h
index 93305fb..c43a87a 100644
--- a/strops.h
+++ b/strops.h
@@ -2,6 +2,7 @@
 #include <string>
 #include <vector>
 #include <sstream>
+#include <algorithm>
 
 std::vector<std::string> tokenize(const std::string& str, const char delim = ' ', const char ignBracketStart = '\0',
 								  const char ignBracketEnd = '\0', const char escapeChar = '\0');
@@ -16,4 +17,6 @@ char getOpposingBracketChar(const char ch);
 
 std::string stripWhitespace(const std::string& in);
 
+void stripQuotes(std::string& in);
+
 size_t eisRemoveUnneededBrackets(std::string& in, long int bracketStart = -1);
diff --git a/test.cpp b/test.cpp
index c64fc46..27cee14 100644
--- a/test.cpp
+++ b/test.cpp
@@ -1,3 +1,4 @@
+#include <cmath>
 #include <iostream>
 #include <complex>
 #include <chrono>
@@ -254,7 +255,7 @@ static bool modelConsistancy()
 
 static bool uneededBrackets()
 {
-	std::string tst("(c-(rc)-(r-cr))");
+	std::string tst("(c-(rc)-(r-c(r)))");
 	eisRemoveUnneededBrackets(tst);
 	if(tst == "c-rc-(r-cr)")
 	{
@@ -299,6 +300,40 @@ static bool nyquistJump()
 	return eis::fvalueEq(aVar, 0.178183);
 }
 
+static bool testEisNyquistDistance()
+{
+	const std::filesystem::path filePath("./relaxis_rp-rp_0.csv");
+	eis::EisSpectra spectra(filePath);
+	if(spectra.data.empty())
+	{
+		eis::Log(eis::Log::INFO)<<__func__<<" Unable to load "<<filePath<<" skiping test";
+		return true;
+	}
+
+	std::vector<fvalue> omega(spectra.data.size());
+	for(size_t i = 0; i < spectra.data.size(); ++i)
+		omega[i] = spectra.data[i].omega;
+	eis::Log(eis::Log::INFO)<<__func__<<" using model string: "<<spectra.model;
+	eis::Model model(spectra.model);
+
+	std::vector<eis::DataPoint> genData = model.executeSweep(omega);
+	fvalue dist = eisNyquistDistance(spectra.data, genData);
+
+	if(std::isnan(dist))
+	{
+		eis::Log(eis::Log::ERROR)<<__func__<<" spectra.data:";
+		printDataVect(spectra.data);
+		eis::Log(eis::Log::ERROR)<<__func__<<" genData:";
+		printDataVect(genData);
+		eis::Log(eis::Log::ERROR)<<__func__<<" distanece is NAN!";
+		return false;
+	}
+	else
+	{
+		return true;
+	}
+}
+
 static bool testTranslators()
 {
 	const std::string boukamp("R(RP)");
@@ -354,20 +389,23 @@ int main(int argc, char** argv)
 	if(!testDistance())
 		return 4;
 
-	if(!runNormalize())
+	if(!testEisNyquistDistance())
 		return 5;
 
-	if(!nyquistVariance())
+	if(!runNormalize())
 		return 6;
 
-	if(!nyquistJump())
+	if(!nyquistVariance())
 		return 7;
 
-	if(!testTranslators())
+	if(!nyquistJump())
 		return 8;
 
-	if(!testMadapParams())
+	if(!testTranslators())
 		return 9;
 
+	if(!testMadapParams())
+		return 10;
+
 	return 0;
 }
diff --git a/translators.cpp b/translators.cpp
index a568142..0b96031 100644
--- a/translators.cpp
+++ b/translators.cpp
@@ -128,8 +128,6 @@ std::string relaxisToEis(const std::string& in, const std::vector<double>& param
 		}
 	}
 
-	std::cout<<work<<std::endl;
-
 	for(size_t i = 0; i < work.size(); ++i)
 	{
 		if(isValidSymbol(std::string(1, work[i]), eisRelaxisTable, true))
@@ -157,8 +155,10 @@ std::string relaxisToEis(const std::string& in, const std::vector<double>& param
 		{
 			if(isValidSymbol(std::string(1, out[i]), eisRelaxisTable, false))
 			{
+				Log::Level oldLevel = Log::level;
+				Log::level =  Log::ERROR;
 				Componant* componant = Componant::createNewComponant(out[i]);
-				std::cout<<"componant "<<out[i]<<" has "<<componant->paramCount()<<" params\n";
+				Log::level =  oldLevel;
 				if(componant->paramCount() > 0)
 				{
 					std::stringstream paramstream;
-- 
GitLab