Upload initial code

e208e8b7 · Andreas Kämper · e208e8b7 · e208e8b7 · e208e8b7 · e208e8b7
Commit e208e8b7 authored Aug 24, 2021 by Andreas Kämper
--- a/automog/code/modeling/clustering.py
+++ b/automog/code/modeling/clustering.py
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Feb 24 20:47:10 2021
+@author: niels
+"""
+import pandas as pd 
+from sklearn.linear_model import LinearRegression
+from sklearn.mixture import GaussianMixture
+from sklearn.cluster import KMeans, AgglomerativeClustering
+import numpy as np
+import copy
+import time
+class Clusterer(object):
+    """ This class contains all methods for the clustering of operating points"""
+    def __init__(self):
+        pass
+    def CombiClustering(self,operating_points, component, method, number_of_clusters,
+                        state=0, max_runtime=600, i=10, patience=10):
+        """ 
+        CombiClustering carries out given clustering method for one state of 
+        one component of all operating points an returns the operating points with new states after clustering.
+        """
+        # init stuff
+        new_states=list()
+        new_operating_points=copy.deepcopy(operating_points)
+        # get data
+        data = operating_points[component][state]
+        max_runtime+= time.time()
+        # perform clustering
+        if method=='Gaussian Mixture':
+            new_states = self.GM(data, number_of_clusters, columns = data.columns)
+        elif method=='Regression Clustering':
+            new_states = self.MultiRC(data, number_of_clusters,max_runtime, i, patience, columns = data.columns)
+        elif method=='KMeans Clustering':
+            new_states= self.Kmeans(data,number_of_clusters, columns=data.columns)
+        elif method=='Agglomerative Clustering':
+            new_states=self.AggloClustering(data, number_of_clusters, columns=data.columns)
+        # postprocess operating points
+        for k in range(len(new_states)):
+            # replaces old state
+            if k == 0:    
+                new_operating_points[component][state]= new_states[k]
+            # adds as many new states as needed
+            else:   
+                new_operating_points[component][len(operating_points[component])+k-1] = new_states[k]
+        return new_operating_points
+    def GM(self,data,number_of_clusters,columns=None):
+        """
+        Gaussian Mixture implementation
+        """
+        clusters    = list()
+        df_clusters = list()
+        Z=data.values
+        for i in range(number_of_clusters):
+            clusters.append(list())
+        gm     = GaussianMixture(n_components=number_of_clusters)
+        labels = gm.fit_predict(data)
+        for i in range(len(labels)):
+            clusters[labels[i]].append(Z[i])
+        for cluster in clusters:
+            df = pd.DataFrame(cluster, columns=columns)
+            df_clusters.append(df)
+        return df_clusters
+    def Kmeans(self,data,number_of_clusters,columns=None):
+        """
+        Kmeans Implementation
+        """
+        Z=data.values
+        clusters    = list()
+        df_clusters = list()
+        for i in range(number_of_clusters):
+            clusters.append(list())
+        KM=KMeans(number_of_clusters, random_state=0)
+        labels= KM.fit_predict(data)
+        for i in range(len(labels)):
+            clusters[labels[i]].append(Z[i])
+        for cluster in clusters:
+            df = pd.DataFrame(cluster, columns=columns)
+            df_clusters.append(df)
+        return df_clusters
+    def AggloClustering(self,data,number_of_clusters,columns=None):
+        """
+        Agglomerative Clustering Implementation
+        """
+        Z=data.values
+        clusters    = list()
+        df_clusters = list()
+        if number_of_clusters==1:   #determinates number of clusters itsself if given only one cluster
+                AG=AgglomerativeClustering()
+                labels= AG.fit_predict(data)
+                for i in range(max(labels)+1):
+                    clusters.append(list())
+        else:        
+            for i in range(number_of_clusters):
+                clusters.append(list())
+            AG=AgglomerativeClustering(number_of_clusters)
+            labels= AG.fit_predict(data)
+        for i in range(len(labels)):
+            clusters[labels[i]].append(Z[i])
+        for cluster in clusters:
+            df = pd.DataFrame(cluster, columns=columns)
+            df_clusters.append(df)
+        return df_clusters
+    def MultiRC(self,data,number_of_clusters,max_runtime=600,i=50,patience=10,maxIterations=5000,columns=None): 
+        """
+        Regression Clustering implementation
+        """
+        temp_TRS = 0
+        TRS = 0
+        clusters=list()
+        limit_reached=False
+        count=0
+        for j in range(i):
+            limit_reached,temp_clusters, temp_TRS= self.RC(data, number_of_clusters,max_runtime,limit_reached,patience,maxIterations,columns)
+            if limit_reached==False:
+                if temp_TRS>TRS:
+                    TRS=temp_TRS
+                    clusters=temp_clusters
+                count+=1
+            else :
+                if count>0:
+                    print("Time or Iteration Limit reached. Clustering canceled.")
+                    return clusters
+                else:
+                    return [data]
+            print("Iteration",j," completed")
+        print("Clustering completed")
+        return clusters
+    def RC (self,data, number_of_clusters,max_runtime=600,limit=False,patience=10,maxIterations=5000,columns=None):
+        # preprocess input
+        output_idx=1
+        for idx, column in enumerate(data.columns):
+            if '(out)' in column:
+                output_idx=idx
+        input_length=len(data.columns)-1
+        y = data.iloc[:,output_idx].to_numpy()
+        for i in range(len(data.columns)):
+            if i !=output_idx:
+                try:
+                    Z=np.dstack((Z,data.iloc[:,i].values))
+                except NameError:
+                    Z=data.iloc[:,i].values
+        for i in range(input_length):
+            if len(Z)==1:
+                Z=Z[0]
+        # list with all clusters
+        clusters= list() 
+        # list with all clusters as pandas DataFrames
+        df_clusters=[] 
+        # list with a foundational function for each cluster
+        functions= list() 
+        # Total Regression Squared
+        TRS=0 
+        # Compared Total Regression Squared
+        CTRS=0 
+        limit_reached=limit
+        patience_count=0
+        patientTRS=[]
+        patientClusters=[]
+        # initialize clusters with random assignment of datapoints to clusters
+        Z = np.reshape(Z,(-1,input_length)) #reshaping for regression function
+        for i in range(number_of_clusters):
+            clusters.append(list())
+        for i in range(len(Z)):
+            clusters[np.random.randint(0,number_of_clusters)].append([Z[i],y[i]])
+        #initial linear regression for each cluster
+        for cluster in clusters:
+            cx=list()#structure all data points in a cluster as x and y lists for regression function
+            cy=list()
+            for i in cluster:
+                cx.append(i[0])
+                cy.append(i[1])
+            function=LinearRegression().fit(cx,cy)
+            functions.append(function)
+            TRS+=function.score(cx,cy)
+        for i in range(maxIterations): #repeats until either best clusters are returned or maximum Iterations are reached to prevent running endlessly
+            if (time.time()<max_runtime):
+            #calculates minimal distanced cluster for each data point and assigns data point to it
+                for c_idx, cluster in enumerate(clusters):
+                    for i_idx,i in enumerate(cluster):
+                        minDistance=(max(y)-min(y))**2+9000
+                        bestfct=0
+                        for f_idx,function in enumerate(functions):
+                            distance= (abs(function.predict(np.reshape(i[0],(-1,input_length)))-i[1]))**2
+                            if distance < minDistance :
+                                minDistance=distance
+                                bestfct=f_idx
+                        if bestfct!=c_idx:
+                            cluster.pop(i_idx)
+                            clusters[bestfct].append(i)
+                #New regression with changed clusters
+                functions.clear()
+                for cluster in clusters:
+                    cx=list()
+                    cy=list()
+                    for i in cluster:
+                        cx.append(i[0])
+                        cy.append(i[1])
+                    function=LinearRegression().fit(cx,cy)
+                    functions.append(function)
+                    CTRS+=function.score(cx,cy)
+                if CTRS<=TRS : #if Total Regression Squared is maximized, regression clustering is complete
+                    patience_count+=1
+                    patientTRS.append(TRS)
+                    patientClusters.append(clusters)
+                    if patience_count >= patience: #only finishes if TRS does not improve patience times
+                        TRS=max(patientTRS)
+                        clusters=patientClusters[np.argmax(patientTRS)]
+                        print("TRS:", TRS)
+                        for cluster in clusters:#Bringing clusters in pandas format
+                            for i in cluster:
+                                i[0]=np.append(i[0] , i[1])
+                                i.pop(1)
+                            for i in range(len(cluster)):
+                                cluster[i]=cluster[i][0]
+                            df=pd.DataFrame(cluster, columns=columns)
+                            df_clusters.append(df)
+                        return limit_reached, df_clusters, TRS
+                TRS=CTRS
+                CTRS=0
+        print("No solution found in maximum number of Iterations or time limit reached.")
+        limit_reached=True
+        return limit_reached, df_clusters, TRS
--- a/automog/code/modeling/data_preprocessing.py
+++ b/automog/code/modeling/data_preprocessing.py
--- a/automog/code/modeling/linearization.py
+++ b/automog/code/modeling/linearization.py
--- a/automog/code/modeling/pwlf_miqp/__init__.py
+++ b/automog/code/modeling/pwlf_miqp/__init__.py
+from .interface import MIQP_model
--- a/automog/code/modeling/pwlf_miqp/data_processing.py
+++ b/automog/code/modeling/pwlf_miqp/data_processing.py
--- a/automog/code/modeling/pwlf_miqp/interface.py
+++ b/automog/code/modeling/pwlf_miqp/interface.py
--- a/automog/code/modeling/pwlf_miqp/kong_and_maravelias.py
+++ b/automog/code/modeling/pwlf_miqp/kong_and_maravelias.py
--- a/automog/code/modeling/pwlf_miqp/rebennack_and_kasko.py
+++ b/automog/code/modeling/pwlf_miqp/rebennack_and_kasko.py
--- a/automog/code/modeling/read_model_pickle.py
+++ b/automog/code/modeling/read_model_pickle.py
+import os
+import pickle
+if __name__ == "__main__":
+    data_path = '../../data/'
+    model_path = 'Goderbauer2016 + case4_GUD/optimization/models/example_model.pkl'
+    with open(data_path + model_path, 'rb') as f:
+        model = pickle.load(f)
\ No newline at end of file
--- a/automog/code/optimization/engine/__init__.py
+++ b/automog/code/optimization/engine/__init__.py
+from .interface import OperationalOptimizationInterface
--- a/automog/code/optimization/engine/data_processing.py
+++ b/automog/code/optimization/engine/data_processing.py
--- a/automog/code/optimization/engine/interface.py
+++ b/automog/code/optimization/engine/interface.py
--- a/automog/code/optimization/engine/optimization.py
+++ b/automog/code/optimization/engine/optimization.py
--- a/automog/code/optimization/engine/optimization_with_Init.py
+++ b/automog/code/optimization/engine/optimization_with_Init.py
--- a/automog/code/optimization/interface/__init__.py
+++ b/automog/code/optimization/interface/__init__.py
+from .optimizer import optimizer, model, instance, results
\ No newline at end of file
--- a/automog/code/optimization/interface/optimizer.py
+++ b/automog/code/optimization/interface/optimizer.py
--- a/automog/code/optimization/settings/config.py
+++ b/automog/code/optimization/settings/config.py
+config = {'ramping': 0, 'startup': 0, 'updowntimes': 0, 'revision': 0}
\ No newline at end of file
--- a/automog/code/optimization/settings/solver_settings.py
+++ b/automog/code/optimization/settings/solver_settings.py
+settings = {'gurobi': {'threads': '4', 'mipgap': '0.05'}, 'cplex': {'threads': 4, 'mipgap': 0.02, 'timelimit': 5400},
+            'glpk': {'mipgap': '0.05'}}
\ No newline at end of file
--- a/automog/data/Goderbauer2016 + case4_GuD/component_parameters.xlsx
+++ b/automog/data/Goderbauer2016 + case4_GuD/component_parameters.xlsx
--- a/automog/data/Goderbauer2016 + case4_GuD/operating_points.xlsx
+++ b/automog/data/Goderbauer2016 + case4_GuD/operating_points.xlsx