Source code for snakemake.stats

__author__ = "Johannes Köster"
__copyright__ = "Copyright 2021, Johannes Köster"
__email__ = "johannes.koester@uni-due.de"
__license__ = "MIT"

import time
import csv
import json
from collections import defaultdict

import snakemake.jobs

fmt_time = time.ctime


[docs]class Stats:
    def __init__(self):
        self.starttime = dict()
        self.endtime = dict()

[docs]    def report_job_start(self, job):
        if job.is_group():
            for j in job:
                self.starttime[j] = time.time()
        else:
            self.starttime[job] = time.time()

[docs]    def report_job_end(self, job):
        if job.is_group():
            for j in job:
                self.endtime[j] = time.time()
        else:
            self.endtime[job] = time.time()

    @property
    def rule_stats(self):
        runtimes = defaultdict(list)
        for job, t in self.starttime.items():
            runtimes[job.rule].append(self.endtime[job] - t)
        for rule, runtimes in runtimes.items():
            yield (rule, sum(runtimes) / len(runtimes), min(runtimes), max(runtimes))

    @property
    def file_stats(self):
        for job, t in self.starttime.items():
            for f in job.expanded_output:
                start, stop = t, self.endtime[job]
                yield f, fmt_time(start), fmt_time(stop), stop - start, job

    @property
    def overall_runtime(self):
        if self.starttime and self.endtime:
            return max(self.endtime.values()) - min(self.starttime.values())
        else:
            return 0

[docs]    def to_json(self, path):
        rule_stats = {
            rule.name: {
                "mean-runtime": mean_runtime,
                "min-runtime": min_runtime,
                "max-runtime": max_runtime,
            }
            for rule, mean_runtime, min_runtime, max_runtime in self.rule_stats
        }
        file_stats = {
            f: {
                "start-time": start,
                "stop-time": stop,
                "duration": duration,
                "priority": job.priority
                if job.priority != snakemake.jobs.Job.HIGHEST_PRIORITY
                else "highest",
                "resources": dict(job.resources.items()),
            }
            for f, start, stop, duration, job in self.file_stats
        }

        with open(path, "w") as f:
            json.dump(
                {
                    "total_runtime": self.overall_runtime,
                    "rules": rule_stats,
                    "files": file_stats,
                },
                f,
                indent=4,
            )