Skip to content
Snippets Groups Projects
Commit 4f08b845 authored by Alex Wiens's avatar Alex Wiens
Browse files

prule.summary: Add more output options

parent 7a0bdba9
No related branches found
No related tags found
No related merge requests found
...@@ -7,6 +7,7 @@ import datetime ...@@ -7,6 +7,7 @@ import datetime
import tempfile import tempfile
import shutil import shutil
import re import re
import copy
import sqlite3 import sqlite3
...@@ -15,9 +16,6 @@ import sqlite3 ...@@ -15,9 +16,6 @@ import sqlite3
import prule.db import prule.db
import prule.debug import prule.debug
#TODO: sorting
#TODO: make grouping optional
#TODO: resource hours
helptext="""Usage: helptext="""Usage:
The program reads entries from the prule database file and creates summaries. The program reads entries from the prule database file and creates summaries.
...@@ -228,37 +226,82 @@ def parse_timeduration(input): ...@@ -228,37 +226,82 @@ def parse_timeduration(input):
return datetime.timedelta(days=int(days), hours=int(hours), minutes=int(minutes), seconds=int(seconds)) return datetime.timedelta(days=int(days), hours=int(hours), minutes=int(minutes), seconds=int(seconds))
raise Exception("Timeduration '{:}' could not be parsed.".format(input)) raise Exception("Timeduration '{:}' could not be parsed.".format(input))
def genTable(table, header=None, align=None, margin=1, header_line="="): def genTable(rows, header=None, subcol=None, align=None, margin=1, header_line="="):
#columns = len(table[0]) #columns = len(rows[0])
columns = 0 columns = 0
for l in table: for l in rows:
columns = max(columns, len(l)) columns = max(columns, len(l))
cmax = [0]*columns cmax = [0]*columns
for row in table: for rix,row in enumerate(rows):
if header != None and rix < header:
continue
for cix,col in enumerate(row): for cix,col in enumerate(row):
cmax[cix] = max(cmax[cix], len(str(col))) cmax[cix] = max(cmax[cix], len(str(col)))
cmax_header = [0]*len(rows[0]) # in case headers exist
if header > 0:
cgroups = []
chix = 0
for cix, col in enumerate(cmax):
if len(cgroups) == chix:
cgroups.append([])
cmax_header[chix] += cmax[cix]
cgroups[-1].append(cix)
if cix < len(cmax)-1 and cix in subcol:
cmax_header[chix] += margin
if cix not in subcol:
chix += 1
for cix,cg in enumerate(cgroups):
cmax_head = max([ len(r[cix]) for r in rows[0:header] ])
cmax_group = cmax_header[cix]
if cmax_head > cmax_group:
cmax[cg[0]] += cmax_head-cmax_group
cmax_header[cix] = cmax_head
out = [] out = []
for rix,row in enumerate(table): norm_margin = " "*margin
subc_margin = " "*margin
if margin > 0:
subc_margin = subc_margin[:int(len(subc_margin)/2.0)] + "/" + subc_margin[int(len(subc_margin)/2.0)+1:]
# header
for rix,row in enumerate(rows):
if header == None or rix >= header:
break
l = "" l = ""
for cix,col in enumerate(row): for cix,col in enumerate(row):
mar = norm_margin
colsize = cmax_header[cix]
if align==None or align=="left": if align==None or align=="left":
l += str(col) + " "*(cmax[cix]-len(str(col))) l += str(col) + " "*(colsize-len(str(col)))
elif align=="right": elif align=="right":
l += " "*(cmax[cix]-len(str(col))) + str(col) l += " "*(colsize-len(str(col))) + str(col)
if cix < len(row)-1: if cix < len(row)-1:
l += " "*margin l += mar
out += [l] out += [l]
if header != None and rix == header-1: if header != None:
l = "" l = ""
for cix, col in enumerate(cmax): for cix, col in enumerate(cmax_header):
l += header_line*col l += header_line*col
if cix < len(row)-1: if cix < len(cmax_header)-1:
l += " "*margin l += " "*margin
out += [l] out += [l]
# normal row
for rix,row in enumerate(rows):
if header != None and rix < header:
continue
l = ""
for cix,col in enumerate(row):
mar = norm_margin
if subcol != None and cix in subcol:
mar = subc_margin
if align==None or align=="left":
l += str(col) + " "*(cmax[cix]-len(str(col)))
elif align=="right":
l += " "*(cmax[cix]-len(str(col))) + str(col)
if cix < len(row)-1:
l += mar
out += [l]
return out return out
#def analyse_user(user_name, jobs): #def analyse_user(user_name, jobs):
def info_print(db_con, args): def info_print(db_con, args):
...@@ -468,21 +511,6 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args): ...@@ -468,21 +511,6 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args):
if accounts_list != None: if accounts_list != None:
cond.append("project IN ({:})".format(",".join(["\"{:}\"".format(a) for a in accounts_list]))) cond.append("project IN ({:})".format(",".join(["\"{:}\"".format(a) for a in accounts_list])))
results_it = db_con.db_get_all_results(conditions=cond, iterator=True)
results = {} # account -> user -> job
for j in results_it:
account = j['project']
user = j['user']
#filters should work with SQL conditions
#if accounts_list != None and account not in accounts_list:
# continue
#if users_list != None and user not in users_list:
# continue
if account not in results:
results[account] = {}
if user not in results[account]:
results[account][user] = []
results[account][user].append(j)
do_overlap = False do_overlap = False
if 'summary_overlap' in args: if 'summary_overlap' in args:
...@@ -495,29 +523,24 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args): ...@@ -495,29 +523,24 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args):
rule_names[rules_dict[n]-1] = n.replace(' ','_') rule_names[rules_dict[n]-1] = n.replace(' ','_')
rule_names_match = ["rule_{:}_match".format(rule_i) for rule_i in range(1, rules_len+1)] rule_names_match = ["rule_{:}_match".format(rule_i) for rule_i in range(1, rules_len+1)]
c_total_jobs = 0 sort_column = args["sort"] if "sort" in args else None
c_total_cpuh = 0.0 sort_reverse = False if "sort_reverse" in args else True
# columns: account/user, total (jobs/cpuh), matched (jobs/cpuh), rule(number/cpuh)
accounts = sorted(results.keys()) def add_vec(a,b):
account_rows = [] if len(a) == 0:
for a in accounts: return copy.copy(b)
users = sorted(results[a].keys()) c = []
a_total_jobs = 0 for ix in range(len(a)):
a_matched_jobs = 0 if type(a[ix]) == tuple:
a_total_cpuh = 0 c.append(tuple( [ a[ix][jx] + b[ix][jx] for jx in range(len(a[ix])) ] ))
a_matched_cpuh = 0 else:
user_rows = [] c.append(a[ix] + b[ix])
for u in users: return c
u_total_jobs = 0 def job_time(job, start, stop, overlap):
u_matched_jobs = 0 sec = job['duration']
u_total_cpuh = 0 if overlap == True:
u_matched_cpuh = 0 j_start = job['start']
rule_total = [(0.0,0.0)]*rules_len j_stop = job['stop']
for j in results[a][u]:
sec = j['duration']
if do_overlap == True: # only consider overlap
j_start = j['start']
j_stop = j['stop']
if j_start < stop_ts and j_stop > start_ts: if j_start < stop_ts and j_stop > start_ts:
if j_start < start_ts or j_stop > stop_ts: if j_start < start_ts or j_stop > stop_ts:
o_start = max(j_start, start_ts) o_start = max(j_start, start_ts)
...@@ -528,40 +551,133 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args): ...@@ -528,40 +551,133 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args):
pass pass
else: else:
sec = 0.0 sec = 0.0
hwt = j['num_hwthreads'] return sec
cpuh = hwt * (sec/3600.0) def job_matched(job, c_time, r_time):
rule_vec = []
matches = 0 matches = 0
for rix,r in enumerate(rule_names_match): for rix,r in enumerate(rule_names_match):
match = j[r] match = job[r]
if match == 1: if match == 1:
matches += 1 matches += 1
rcount, rcpuh = rule_total[rix] rule_vec.append((1, c_time, r_time))
rule_total[rix] = (rcount+1, rcpuh+cpuh) else:
if matches > 0: rule_vec.append((0, 0.0, 0.0))
u_matched_jobs += 1 return matches, rule_vec
u_matched_cpuh += cpuh def job_vec(job, start, stop, overlap):
u_total_jobs += 1 # total_jobs, total_cpuh, total_resh, matched_jobs, matched_cpuh, matched_resh
u_total_cpuh += cpuh j_time = job_time(job, start, stop, overlap)
a_total_jobs += u_total_jobs j_time = j_time/3600.0 # cpu seconds to cpu hours
a_matched_jobs += u_matched_jobs c_time = job["num_hwthreads"] * j_time
a_total_cpuh += u_total_cpuh r_time = job["num_acc"] * j_time
a_matched_cpuh += u_matched_cpuh matches, rule_vec = job_matched(job, c_time, r_time)
rule_columns = [ "{:}/{:.2f}".format(rjobs,rcoreh) for rjobs,rcoreh in rule_total] matched = 1 if matches > 0 else 0
user_rows.append([u, "{:}/{:.2f}".format(u_total_jobs, u_total_cpuh), "{:}/{:.2f}".format(u_matched_jobs, u_matched_cpuh)] + rule_columns) v = [1, c_time, r_time, matched, matched * c_time, matched * r_time] + rule_vec
account_rows.append([a, "{:}/{:.2f}".format(a_total_jobs, a_total_cpuh), "{:}/{:.2f}".format(a_matched_jobs, a_matched_cpuh)]) return v
account_rows += user_rows class Group:
c_total_jobs += a_total_jobs def __init__(self, name):
c_total_cpuh += a_total_cpuh self.name = name
# print header self.subgroups = {}
header = ["account/user", "total (jobs/cpuh)", "matched (jobs/cpuh)"] + rule_names self.jobs = []
# print rows self.vec = []
cluster_times = "Earliest: {:} {:} Latest: {:} {:}".format( def add(self, job, vec):
datetime.datetime.fromtimestamp(job_started_earliest['start']), raise Exception("Group.add not implemented")
datetime.datetime.fromtimestamp(job_started_earliest['stop']), def print_vec(self):
datetime.datetime.fromtimestamp(job_finished_latest['start']), r = [self.name]
datetime.datetime.fromtimestamp(job_finished_latest['stop'])) for c in self.vec:
print("Summary:", cluster, start, stop, "Total jobs: {:}".format(c_total_jobs), "Total cpuh: {:.2f}".format(c_total_cpuh), cluster_times) if type(c) == float:
out = genTable([header]+account_rows, header=1) r.append("{:.2f}".format(c))
elif type(c) == tuple:
allzero = True
t = []
for j in c:
if type(j) == float:
allzero = allzero and j == 0.0
t.append("{:.2f}".format(j))
elif type(j) == int:
allzero = allzero and j == 0
t.append(str(j))
else:
allzero = False
t.append(str(j))
if allzero == True:
r += [""]*len(t)
else:
r += t
elif type(c) == int:
r.append(str(c))
else:
r.append(str(c))
return r
def print_rows(self, sortcol=None, sortrev=True):
r = [self.print_vec()]
subg = self.subgroups.values()
if sortcol != None:
subg = sorted(subg, reverse=sortrev, key=lambda s: ([s.name]+s.vec)[sortcol])
for g in subg:
r += g.print_rows(sortcol=sortcol, sortrev=sortrev)
return r
class ClusterGroupAcc(Group):
def add(self, job, vec):
self.vec = add_vec(self.vec, vec)
self.jobs.append(job)
account = job['project']
if account not in self.subgroups:
self.subgroups[account] = AccountGroup(account)
self.subgroups[account].add(job, vec)
class ClusterGroupUse(Group):
def add(self, job, vec):
self.vec = add_vec(self.vec, vec)
self.jobs.append(job)
user = job['user']
if user not in self.subgroups:
self.subgroups[user] = UserGroup(user)
self.subgroups[user].add(job, vec)
class AccountGroup(Group):
def add(self, job, vec):
self.vec = add_vec(self.vec, vec)
self.jobs.append(job)
user = job['user']
if user not in self.subgroups:
self.subgroups[user] = UserGroup(user)
self.subgroups[user].add(job, vec)
class UserGroup(Group):
def add(self, job, vec):
self.vec = add_vec(self.vec, vec)
self.jobs.append(job)
grouping = "group_acc" in args
if grouping == True:
cgroup = ClusterGroupAcc(cluster)
else:
cgroup = ClusterGroupUse(cluster)
results_it = db_con.db_get_all_results(conditions=cond, iterator=True)
results = {} # account -> user -> job
for j in results_it:
account = j['project']
user = j['user']
#filters should work with SQL conditions
#if accounts_list != None and account not in accounts_list:
# continue
#if users_list != None and user not in users_list:
# continue
#if account not in results:
# results[account] = {}
#if user not in results[account]:
# results[account][user] = []
#results[account][user].append(j)
j_vec = job_vec(j, start, stop, do_overlap)
cgroup.add(j, j_vec)
r = cgroup.print_rows(sortcol=sort_column, sortrev=sort_reverse)
header = ["account/user", "total (jobs/cpuh/resh)", "matched (jobs/cpuh/resh)"] + rule_names
align = ["left"] + (["right"]*(6 + (3*len(rule_names))))
subcol_1 = set([1,2,4,5])
subcol_2 = set(range(7, 7+(3*len(rule_names))))
subcol_3 = set(range(7+2, 7+(3*len(rule_names))+2, 3))
subcol = subcol_1.union( subcol_2.difference(subcol_3) )
out = genTable([header]+r, header=1, subcol=subcol, align="right")
#out = genTable([header]+account_rows, header=1)
for l in out: for l in out:
print(l) print(l)
...@@ -618,6 +734,12 @@ if __name__ == "__main__": ...@@ -618,6 +734,12 @@ if __name__ == "__main__":
help="Count affected job numbers as job count or as cpu hours.") help="Count affected job numbers as job count or as cpu hours.")
summary_group.add_argument('--summary-overlap', action='store_true', summary_group.add_argument('--summary-overlap', action='store_true',
help="Only consider cpu hours that overlap with timeframe.") help="Only consider cpu hours that overlap with timeframe.")
summary_group.add_argument('--sort', type=int, metavar='COLUMN',
help="Sort by column index (starting with 0).")
summary_group.add_argument('--sort-reverse', action='store_true',
help="Sort ascending instead of descending.")
summary_group.add_argument('--group-acc', action='store_true',
help="Group by account.")
# svg_group = parser.add_argument_group('SVG parameters', # svg_group = parser.add_argument_group('SVG parameters',
# 'Configure SVG output.') # 'Configure SVG output.')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment