diff --git a/Code ohne Messdaten/Statistische_Auswertung.py b/Code ohne Messdaten/Statistische_Auswertung.py new file mode 100644 index 0000000000000000000000000000000000000000..5d57c2e484799bb6d6bc1143cd5acd5a5ac7b9e1 --- /dev/null +++ b/Code ohne Messdaten/Statistische_Auswertung.py @@ -0,0 +1,128 @@ +""" +Created on Tue Feb 28 12:15:32 2023 +@author: Alberto d'Aquino Hilt +""" + +# Library for the median +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from matplotlib import colors as mcolors +from sklearn.linear_model import LinearRegression +import statsmodels.api as sm +from bioinfokit.analys import stat + +path = r'C:\Users\alber\Desktop\Messungen\Dynamische_Messung\Messdaten' + +df = pd.read_csv(path + '\Maximale Werte und Quotienten jedes Materials (kg).csv', index_col = [0]) + +# plot linear regression and ANOVA +df_anova = pd.DataFrame(columns = ['quotient','material']) +mad_list = [] +df_mean = pd.DataFrame() +df_quot = pd.DataFrame() +df_std = pd.DataFrame() + +# create linear regression of every material with p-value under 2 +def linregression(x, y): + verl2 = y # max stepvalues of treadmill (N) + xaxistoplot = x # max stepvalues of arduino (N) + if len(verl2) > 0: + xi = np.reshape(xaxistoplot, (-1, 1)) + yi = np.reshape(verl2, (-1, 1)) + reg = LinearRegression() + reg.fit(xi, yi) + predictions = reg.predict(xi) + + X2 = sm.add_constant(xaxistoplot) + est = sm.OLS(verl2, X2) + est2 = est.fit() + # print(est2.summary()) + global p + p = est2.pvalues[1] + global std_base + std_base = est2.bse[0] + global r2 + r2 = est2.rsquared + mad = np.median(np.abs(y-predictions)) + return p, mad,predictions +colours = list(mcolors.TABLEAU_COLORS.keys()) + +plt.figure(figsize = (10, 7)) +for i in range(0, 8): + + # define x and y values for linear regression + x = list(df['reg_' + str(i)].dropna()*9.81) + y = list(df['tread_' + str(i)].dropna()*9.81) + + # plot linear regression + plt.scatter(x, y, s = 8, label = 'Material ' + str(i)) + + pvalue, mad, predictions = linregression(x, y) + + # if the p-value is significant with bonferroni correction + if pvalue < (0.05/8): + plt.plot(x, predictions, color = colours[i]) + + # print the numer of Material, MAD and p-value + print('Material: ' + str(i), 'MAD: ', mad ,'P-Wert: ', pvalue) + +plt.title('Lineare Regression', fontdict = {'size':16}) +plt.xlabel('Maximale VBRK der Einlagesohle [N]', fontdict = {'size':12}) +plt.ylabel('Maximale VBRK des Laufbandes [N]', fontdict = {'size':12}) +plt.xlim(100, 450) +plt.ylim(1200, 1800) +plt.legend(loc = 2) +plt.grid(True) +plt.savefig(path + '\Lineare_Regression.pdf') +plt.show() + + +# plot the quotients +plt.figure(figsize = (10, 7)) +for i in range(0, 8): + + # save the quotient in list and append to a dataframe for boxplot + z = list(df['quot_' + str(i)].dropna()) + df_z = pd.DataFrame(z) + df_quot = pd.concat([df_quot, df_z], axis = 1) + + fg = pd.DataFrame(data = {'quotient': z, 'material': [str(i) for ii in range(len(z))]}) + df_anova = pd.concat([df_anova, fg], ignore_index = True) + + # save values of mad in dataframe for plot + mad_list.append(mad) + + # calculate standard deviation of quotient of every material + df_std = pd.concat([df_std, np.std(df_z)]) + df_std = df_std.reset_index(drop = True) + +box = plt.boxplot(df_quot.dropna(), widths = 0.5, patch_artist = True, + medianprops = dict(color = 'black', linewidth = 1), # change color of median + labels = ['0', '1', '2', '3', '4', '5', '6', '7']) + +# change color of boxplot +for patch, color in zip(box['boxes'], colours): + patch.set_facecolor(color) + +plt.title('Quotienten der Materialien', fontdict = {'size':16}) +plt.xlabel('Material', fontdict = {'size':12}) +plt.ylabel('Quotient', fontdict = {'size':12}) +plt.xlim(0, 9) +plt.ylim(0.07, 0.32) +plt.grid(True) +plt.savefig(path + '\Quotienten.pdf') +plt.show() + +# print results of ANOVA and post-hoc test +print('#########################################################################') +print('ANOVA:') +res = stat() +res.anova_stat(df = df_anova, res_var = 'quotient', anova_model = 'quotient ~ C(material)') +print(res.anova_summary) +print('#########################################################################') +print('Post-hoc:') +res = stat() +res.tukey_hsd(df = df_anova, res_var = 'quotient', anova_model = 'quotient ~ C(material)',xfac_var='material') +print(res.tukey_summary) +