Source code for snakemake.stats

__author__ = "Johannes Köster"
__copyright__ = "Copyright 2021, Johannes Köster"
__email__ = "johannes.koester@uni-due.de"
__license__ = "MIT"

import time
import csv
import json
from collections import defaultdict

import snakemake.jobs

fmt_time = time.ctime


[docs]class Stats: def __init__(self): self.starttime = dict() self.endtime = dict()
[docs] def report_job_start(self, job): if job.is_group(): for j in job: self.starttime[j] = time.time() else: self.starttime[job] = time.time()
[docs] def report_job_end(self, job): if job.is_group(): for j in job: self.endtime[j] = time.time() else: self.endtime[job] = time.time()
@property def rule_stats(self): runtimes = defaultdict(list) for job, t in self.starttime.items(): runtimes[job.rule].append(self.endtime[job] - t) for rule, runtimes in runtimes.items(): yield (rule, sum(runtimes) / len(runtimes), min(runtimes), max(runtimes)) @property def file_stats(self): for job, t in self.starttime.items(): for f in job.expanded_output: start, stop = t, self.endtime[job] yield f, fmt_time(start), fmt_time(stop), stop - start, job @property def overall_runtime(self): if self.starttime and self.endtime: return max(self.endtime.values()) - min(self.starttime.values()) else: return 0
[docs] def to_json(self, path): rule_stats = { rule.name: { "mean-runtime": mean_runtime, "min-runtime": min_runtime, "max-runtime": max_runtime, } for rule, mean_runtime, min_runtime, max_runtime in self.rule_stats } file_stats = { f: { "start-time": start, "stop-time": stop, "duration": duration, "priority": job.priority if job.priority != snakemake.jobs.Job.HIGHEST_PRIORITY else "highest", "resources": dict(job.resources.items()), } for f, start, stop, duration, job in self.file_stats } with open(path, "w") as f: json.dump( { "total_runtime": self.overall_runtime, "rules": rule_stats, "files": file_stats, }, f, indent=4, )