# This script can reconstruct phase tables with missing data from the participant logs # Prerequsites: # - The headers of the phase tables need to be valid (apart from those the files can be empty) # - the IndependentVariables.csv needs to be complete (we just assume this since it was the case for us, otherwise also restore that!) # - no underscore (_) in the Phase or multi trial variable names import os from contextlib import chdir FolderToLogs = "StudyLogs/ParticipantLogs" FolderToPhaseTables = "StudyLogs" IndependentVars = {} def LoadIndependentVars(): global IndependentVars ConvertCoding(os.path.join(FolderToPhaseTables, "IndependentVariables.csv")) with open(os.path.join(FolderToPhaseTables, "IndependentVariables.csv"), 'r') as file: header_entries = [] for line in file: if len(header_entries) == 0: header_entries = line.strip().split(",") continue entries = line.strip().split(",") data = {} for i in range(0, len(entries)): data[header_entries[i]] = entries[i] IndependentVars[entries[0]] = data #print(IndependentVars) def CreateEntryLine(header_entries, data): line = "" for entry in header_entries: if entry in data: line += data[entry] else: if not (entry == "lowerPrio" or entry == "higerPrio"): #some vars are not required (which we don't know here, so I added them for our case manually!) print("WARNING: Missing data for "+entry) if not entry == header_entries[-1]: line += "," else: line += "\n" return line def CheckForSplitCommaInSentence(entries): entries_cleaned = [] i=0 while i < len(entries): if entries[i].startswith("\"") and not entries[i].endswith("\""): cleaned_entry = "" while not entries[i].endswith("\""): cleaned_entry += entries[i] + "[Komma]" i += 1 cleaned_entry += entries[i] entries_cleaned.append(cleaned_entry) else: entries_cleaned.append(entries[i]) i += 1 return entries_cleaned def RecoverDataTable(phase_filename, multi_trial): phase_name = phase_filename.replace("Phase_","").replace(".csv","") multi_trial_var_name = "" if multi_trial: phase_name = phase_filename.replace(".csv","").split("_")[1] multi_trial_var_name = phase_filename.replace(".csv","").split("_")[2] header_entries = [] out_lines = [] #read in header of this table with open(os.path.join(FolderToPhaseTables, phase_filename), 'r') as f: header_line = f.readline() header_entries = header_line.strip().split(",") out_lines.append(header_line) #now read through all participant logs and gather relevant information with chdir(FolderToLogs): for filename in sorted(filter(os.path.isfile, os.listdir(".")), key=os.path.getmtime): with open(filename, 'r') as file: ParticipantID = filename.split("-")[1].split("_")[0] reading_relevant_condition = False trial_nr = 0 start_time = 0.0 data = {} for line in file: if "Start Condition:" in line and "Phase: "+phase_name+";" in line: reading_relevant_condition = True trial_nr = 0 #simply not used if not multi-trial var start_time = float(line.strip().replace("#","").split(":")[0]) data = {"Phase":phase_name} data.update(IndependentVars[ParticipantID]) # adds ParticipantId and IVs to dict for factor_levels in line.split("(")[1].split(")")[0].split(";"): if "Phase:" in factor_levels: continue factor, level = factor_levels[1:].split(": ") data[factor] = level if reading_relevant_condition and "EndCondition" in line: reading_relevant_condition = False if not multi_trial: #in multi_trial case we store data not at the end but when it is recorded data["Time"] = "{:.2f}".format(float(line.strip().replace("#","").split(":")[0]) - start_time) out_lines.append(CreateEntryLine(header_entries, data)) #print(line) if "Recorded" in line: var_name = line.split(" ")[2][:-1] #last part removes ":" from the end var_value = line.strip().split(" ",3)[3] if multi_trial and var_name == multi_trial_var_name and reading_relevant_condition: #this is a multi trial var we are looking for var_entries = var_value.replace("{","").replace("}","").split(",") #maybe we split at , in phrases which are escaped in ", so check that! var_entries = CheckForSplitCommaInSentence(var_entries) for i in range(0,len(var_entries)): header_index = len(header_entries)-len(var_entries)+i data[header_entries[header_index]] = var_entries[i] data["Trial"] = str(trial_nr) trial_nr += 1 out_lines.append(CreateEntryLine(header_entries, data)) if (not multi_trial) and var_name in header_entries: data[var_name] = var_value #now write this if not os.path.exists(os.path.join(FolderToPhaseTables,"Recovered")): os.mkdir(os.path.join(FolderToPhaseTables,"Recovered")) with open(os.path.join(FolderToPhaseTables,"Recovered",phase_filename), 'w') as f: f.writelines(out_lines) def ConvertCoding(full_filename): #remove all the byte order marks that Unreal puts in there with open(full_filename, mode='r', encoding='utf-8-sig') as file: lines = file.readlines() modified_lines = [line.lstrip('\ufeff') for line in lines] with open(full_filename, mode='w', encoding='utf-8') as file: file.writelines(modified_lines) def Main(): LoadIndependentVars() #go through all files in the phasetable folder for filename in os.listdir(FolderToPhaseTables): if filename.startswith("Phase") and filename.endswith(".csv"): #if not filename == "Phase_Decision_singlePlayDurationLeft.csv": # continue full_name = os.path.join(FolderToPhaseTables, filename) ConvertCoding(full_name) print(full_name) #check whether this is a multiple trial data table or a normal phase with open(full_name, 'r') as f: header = f.readline() if ",Trial," in header: #we expect this to be a multiple trial data table RecoverDataTable(filename, True) else: RecoverDataTable(filename, False) Main()