prule.summary: Add more output options

4f08b845 · Alex Wiens · 7a0bdba9 · 4f08b845
Commit 4f08b845 authored 3 months ago by Alex Wiens
--- a/prule/summary/__main__.py
+++ b/prule/summary/__main__.py
@@ -7,6 +7,7 @@ import datetime
 import tempfile
 import shutil
 import re
+import copy
 import sqlite3
@@ -15,9 +16,6 @@ import sqlite3
 import prule.db
 import prule.debug
-#TODO: sorting
-#TODO: make grouping optional
-#TODO: resource hours
 helptext="""Usage:
    The program reads entries from the prule database file and creates summaries.
@@ -228,37 +226,82 @@ def parse_timeduration(input):
        return datetime.timedelta(days=int(days), hours=int(hours), minutes=int(minutes), seconds=int(seconds))
    raise Exception("Timeduration '{:}' could not be parsed.".format(input))
-def genTable(table, header=None, align=None, margin=1, header_line="="):
+def genTable(rows, header=None, subcol=None, align=None, margin=1, header_line="="):
-    #columns = len(table[0])
+    #columns = len(rows[0])
    columns = 0
-    for l in table:
+    for l in rows:
        columns = max(columns, len(l))
    cmax = [0]*columns
-    for row in table:
+    for rix,row in enumerate(rows):
+        if header != None and rix < header:
+            continue
        for cix,col in enumerate(row):
            cmax[cix] = max(cmax[cix], len(str(col)))
+    cmax_header = [0]*len(rows[0]) # in case headers exist
+    if header > 0:
+        cgroups = []
+        chix = 0
+        for cix, col in enumerate(cmax):
+            if len(cgroups) == chix:
+                cgroups.append([])
+            cmax_header[chix] += cmax[cix]
+            cgroups[-1].append(cix)
+            if cix < len(cmax)-1 and cix in subcol:
+                cmax_header[chix] += margin
+            if cix not in subcol:
+                chix += 1
+        for cix,cg in enumerate(cgroups):
+            cmax_head = max([ len(r[cix]) for r in rows[0:header] ])
+            cmax_group = cmax_header[cix]
+            if cmax_head > cmax_group:
+                cmax[cg[0]] += cmax_head-cmax_group
+                cmax_header[cix] = cmax_head
    out = []
-    for rix,row in enumerate(table):
+    norm_margin = " "*margin
+    subc_margin = " "*margin
+    if margin > 0:
+        subc_margin = subc_margin[:int(len(subc_margin)/2.0)] + "/" + subc_margin[int(len(subc_margin)/2.0)+1:]
+    # header
+    for rix,row in enumerate(rows):
+        if header == None or rix >= header:
+            break
        l = ""
        for cix,col in enumerate(row):
+            mar = norm_margin
+            colsize = cmax_header[cix]
            if align==None or align=="left":
-                l += str(col) + " "*(cmax[cix]-len(str(col)))
+                l += str(col) + " "*(colsize-len(str(col)))
            elif align=="right":
-                l += " "*(cmax[cix]-len(str(col))) + str(col)
+                l += " "*(colsize-len(str(col))) + str(col)
            if cix < len(row)-1:
-                l += " "*margin
+                l += mar
        out += [l]
-        if header != None and rix == header-1:
+    if header != None:
        l = ""
-            for cix, col in enumerate(cmax):
+        for cix, col in enumerate(cmax_header):
            l += header_line*col
-                if cix < len(row)-1:
+            if cix < len(cmax_header)-1:
                l += " "*margin
        out += [l]
+    # normal row
+    for rix,row in enumerate(rows):
+        if header != None and rix < header:
+            continue
+        l = ""
+        for cix,col in enumerate(row):
+            mar = norm_margin
+            if subcol != None and cix in subcol:
+                mar = subc_margin
+            if align==None or align=="left":
+                l += str(col) + " "*(cmax[cix]-len(str(col)))
+            elif align=="right":
+                l += " "*(cmax[cix]-len(str(col))) + str(col)
+            if cix < len(row)-1:
+                l += mar
+        out += [l]
    return out
 #def analyse_user(user_name, jobs):
 def info_print(db_con, args):
@@ -468,21 +511,6 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args):
    if accounts_list != None:
        cond.append("project IN ({:})".format(",".join(["\"{:}\"".format(a) for a in accounts_list])))
-    results_it = db_con.db_get_all_results(conditions=cond, iterator=True)
-    results = {} # account -> user -> job
-    for j in results_it:
-        account = j['project']
-        user    = j['user']
-        #filters should work with SQL conditions
-        #if accounts_list != None and account not in accounts_list:
-        #    continue
-        #if users_list != None and user not in users_list:
-        #    continue
-        if account not in results:
-            results[account] = {}
-        if user not in results[account]:
-            results[account][user] = []
-        results[account][user].append(j)
    do_overlap = False
    if 'summary_overlap' in args:
@@ -495,29 +523,24 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args):
        rule_names[rules_dict[n]-1] = n.replace(' ','_')
    rule_names_match = ["rule_{:}_match".format(rule_i) for rule_i in range(1, rules_len+1)]
-    c_total_jobs = 0
+    sort_column = args["sort"] if "sort" in args else None
-    c_total_cpuh = 0.0
+    sort_reverse = False if "sort_reverse" in args else True
-    # columns: account/user, total (jobs/cpuh), matched (jobs/cpuh), rule(number/cpuh)
-    accounts = sorted(results.keys())
+    def add_vec(a,b):
-    account_rows = []
+        if len(a) == 0:
-    for a in accounts:
+            return copy.copy(b)
-        users = sorted(results[a].keys())
+        c = []
-        a_total_jobs   = 0
+        for ix in range(len(a)):
-        a_matched_jobs = 0
+            if type(a[ix]) == tuple:
-        a_total_cpuh   = 0
+                c.append(tuple(  [ a[ix][jx] + b[ix][jx] for jx in range(len(a[ix])) ]  ))
-        a_matched_cpuh = 0
+            else:
-        user_rows = []
+                c.append(a[ix] + b[ix])
-        for u in users:
+        return c
-            u_total_jobs   = 0
+    def job_time(job, start, stop, overlap):
-            u_matched_jobs = 0
+        sec  = job['duration']
-            u_total_cpuh   = 0
+        if overlap == True:
-            u_matched_cpuh = 0
+            j_start = job['start']
-            rule_total = [(0.0,0.0)]*rules_len
+            j_stop  = job['stop']
-            for j in results[a][u]:
-                sec  = j['duration']
-                if do_overlap == True: # only consider overlap
-                    j_start = j['start']
-                    j_stop  = j['stop']
            if j_start < stop_ts and j_stop > start_ts:
                if j_start < start_ts or j_stop > stop_ts:
                    o_start = max(j_start, start_ts)
@@ -528,40 +551,133 @@ def summary_cluster(db_con, cluster, users_list, accounts_list, args):
                    pass
            else:
                sec = 0.0
-                hwt  = j['num_hwthreads']
+        return sec
-                cpuh = hwt * (sec/3600.0)
+    def job_matched(job, c_time, r_time):
+        rule_vec = []
        matches = 0
        for rix,r in enumerate(rule_names_match):
-                    match = j[r]
+            match = job[r]
            if match == 1:
                matches += 1
-                        rcount, rcpuh = rule_total[rix]
+                rule_vec.append((1, c_time, r_time))
-                        rule_total[rix] = (rcount+1, rcpuh+cpuh)
+            else:
-                if matches > 0:
+                rule_vec.append((0, 0.0, 0.0))
-                    u_matched_jobs += 1
+        return matches, rule_vec
-                    u_matched_cpuh += cpuh
+    def job_vec(job, start, stop, overlap):
-                u_total_jobs += 1
+        # total_jobs, total_cpuh, total_resh, matched_jobs, matched_cpuh, matched_resh
-                u_total_cpuh += cpuh
+        j_time = job_time(job, start, stop, overlap)
-            a_total_jobs   += u_total_jobs
+        j_time = j_time/3600.0 # cpu seconds to cpu hours
-            a_matched_jobs += u_matched_jobs
+        c_time = job["num_hwthreads"] * j_time
-            a_total_cpuh   += u_total_cpuh
+        r_time = job["num_acc"] * j_time
-            a_matched_cpuh += u_matched_cpuh
+        matches, rule_vec = job_matched(job, c_time, r_time)
-            rule_columns = [ "{:}/{:.2f}".format(rjobs,rcoreh) for rjobs,rcoreh in rule_total]
+        matched = 1 if matches > 0 else 0
-            user_rows.append([u, "{:}/{:.2f}".format(u_total_jobs, u_total_cpuh), "{:}/{:.2f}".format(u_matched_jobs, u_matched_cpuh)] + rule_columns)
+        v = [1, c_time, r_time, matched, matched * c_time, matched * r_time] + rule_vec
-        account_rows.append([a, "{:}/{:.2f}".format(a_total_jobs, a_total_cpuh), "{:}/{:.2f}".format(a_matched_jobs, a_matched_cpuh)])
+        return v
-        account_rows += user_rows
+    class Group:
-        c_total_jobs += a_total_jobs
+        def __init__(self, name):
-        c_total_cpuh += a_total_cpuh
+            self.name = name
-    # print header
+            self.subgroups = {}
-    header = ["account/user", "total (jobs/cpuh)", "matched (jobs/cpuh)"] + rule_names
+            self.jobs = []
-    # print rows
+            self.vec = []
-    cluster_times = "Earliest: {:} {:} Latest: {:} {:}".format(
+        def add(self, job, vec):
-        datetime.datetime.fromtimestamp(job_started_earliest['start']), 
+            raise Exception("Group.add not implemented")
-        datetime.datetime.fromtimestamp(job_started_earliest['stop']),
+        def print_vec(self):
-        datetime.datetime.fromtimestamp(job_finished_latest['start']), 
+            r = [self.name]
-        datetime.datetime.fromtimestamp(job_finished_latest['stop']))
+            for c in self.vec:
-    print("Summary:", cluster, start, stop, "Total jobs: {:}".format(c_total_jobs), "Total cpuh: {:.2f}".format(c_total_cpuh), cluster_times)
+                if type(c) == float:
-    out = genTable([header]+account_rows, header=1)
+                    r.append("{:.2f}".format(c))
+                elif type(c) == tuple:
+                    allzero = True
+                    t = []
+                    for j in c:
+                        if type(j) == float:
+                            allzero = allzero and j == 0.0
+                            t.append("{:.2f}".format(j))
+                        elif type(j) == int:
+                            allzero = allzero and j == 0
+                            t.append(str(j))
+                        else:
+                            allzero = False
+                            t.append(str(j))
+                    if allzero == True:
+                        r += [""]*len(t)
+                    else:
+                        r += t
+                elif type(c) == int:
+                    r.append(str(c))
+                else:
+                    r.append(str(c))
+            return r
+        def print_rows(self, sortcol=None, sortrev=True):
+            r = [self.print_vec()]
+            subg = self.subgroups.values()
+            if sortcol != None:
+                subg = sorted(subg, reverse=sortrev, key=lambda s: ([s.name]+s.vec)[sortcol])
+            for g in subg:
+                r += g.print_rows(sortcol=sortcol, sortrev=sortrev)
+            return r
+    class ClusterGroupAcc(Group):
+        def add(self, job, vec):
+            self.vec = add_vec(self.vec, vec)
+            self.jobs.append(job)
+            account = job['project']
+            if account not in self.subgroups:
+                self.subgroups[account] = AccountGroup(account)
+            self.subgroups[account].add(job, vec)
+    class ClusterGroupUse(Group):
+        def add(self, job, vec):
+            self.vec = add_vec(self.vec, vec)
+            self.jobs.append(job)
+            user = job['user']
+            if user not in self.subgroups:
+                self.subgroups[user] = UserGroup(user)
+            self.subgroups[user].add(job, vec)
+    class AccountGroup(Group):
+        def add(self, job, vec):
+            self.vec = add_vec(self.vec, vec)
+            self.jobs.append(job)
+            user = job['user']
+            if user not in self.subgroups:
+                self.subgroups[user] = UserGroup(user)
+            self.subgroups[user].add(job, vec)
+    class UserGroup(Group):
+        def add(self, job, vec):
+            self.vec = add_vec(self.vec, vec)
+            self.jobs.append(job)
+    grouping = "group_acc" in args
+    if grouping == True:
+        cgroup = ClusterGroupAcc(cluster)
+    else:
+        cgroup = ClusterGroupUse(cluster)
+    results_it = db_con.db_get_all_results(conditions=cond, iterator=True)
+    results = {} # account -> user -> job
+    for j in results_it:
+        account = j['project']
+        user    = j['user']
+        #filters should work with SQL conditions
+        #if accounts_list != None and account not in accounts_list:
+        #    continue
+        #if users_list != None and user not in users_list:
+        #    continue
+        #if account not in results:
+        #    results[account] = {}
+        #if user not in results[account]:
+        #    results[account][user] = []
+        #results[account][user].append(j)
+        j_vec = job_vec(j, start, stop, do_overlap)
+        cgroup.add(j, j_vec)
+    r = cgroup.print_rows(sortcol=sort_column, sortrev=sort_reverse)
+    header = ["account/user", "total (jobs/cpuh/resh)", "matched (jobs/cpuh/resh)"] + rule_names
+    align = ["left"] + (["right"]*(6 + (3*len(rule_names))))
+    subcol_1 = set([1,2,4,5])
+    subcol_2 = set(range(7, 7+(3*len(rule_names))))
+    subcol_3 = set(range(7+2, 7+(3*len(rule_names))+2, 3))
+    subcol = subcol_1.union( subcol_2.difference(subcol_3) )
+    out = genTable([header]+r, header=1, subcol=subcol, align="right")
+    #out = genTable([header]+account_rows, header=1)
    for l in out:
        print(l)
@@ -618,6 +734,12 @@ if __name__ == "__main__":
        help="Count affected job numbers as job count or as cpu hours.")
    summary_group.add_argument('--summary-overlap', action='store_true',
        help="Only consider cpu hours that overlap with timeframe.")
+    summary_group.add_argument('--sort', type=int, metavar='COLUMN',
+        help="Sort by column index (starting with 0).")
+    summary_group.add_argument('--sort-reverse', action='store_true',
+        help="Sort ascending instead of descending.")
+    summary_group.add_argument('--group-acc', action='store_true',
+        help="Group by account.")
 #    svg_group = parser.add_argument_group('SVG parameters',
 #        'Configure SVG output.')