/pretty_benchmark_history.ts | pretty_benching@v0.3.3

x/pretty_benching/pretty_benchmark_history.ts
:sauropod: A small lib to make your Deno benchmarking progress and results look pretty
Latest
File
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
import type { BenchmarkResult, BenchmarkRunResult } from "./deps.ts";import type { Threshold, Thresholds } from "./types.ts";
/** Defines the rules on what and how the history should contain. * * @template T The type that is calculated with `benchExtras` function and stored in each benchmarks' `extras`. * @template K The type that is calculated with `runExtras` function and stored in each runs' `runExtras`. */export interface prettyBenchmarkHistoryOptions<T = unknown, K = unknown> {  /** Allow storing low precision measurements */  easeOnlyHrTime?: boolean;  /** Turns on strict mode. Setting it to boolean `true` gives the same result as setting each rule to `true` in the rules `object`.  */  strict?: boolean | strictHistoryRules;  /** Throw an error, when **any** benchmark has lower runsCount than the set value. */  minRequiredRuns?: number;  /** Saves the `measuredRunsMs` array for each benchmark.    *    * **WARNING** this could result in a very big history file overtime.    *    * Consider calculating necessary values before save instead with `benchExtras` or `runExtras`.*/  saveIndividualRuns?: boolean;  /** Saves the returned `object` for each benchmark into it's `extras` property. */  benchExtras?: (result: BenchmarkResult) => T;  /** Saves the returned `object` for each run into it's `runExtras` property. */  runExtras?: (runResult: BenchmarkRunResult) => K;}
/** Defines which strict rules to use. */export type strictHistoryRules = {  /** Throw an error, when previously saved benchmark is missing from the current set when calling `addResults`. Ignored on the very first set of benchmarks. */  noRemoval?: boolean;  /** Throw an error, when previously not saved benchmark is added to the current set when calling `addResults`. Ignored on the very first set of benchmarks. */  noAddition?: boolean;  /** Throw an error, when the `runsCount` changes for a benchmark from the previous run's `runsCount`. Ignored on new benchmarks. */  noRunsCountChange?: boolean;};
/** Represents the stored historic benchmark data  * * @template T The type that is calculated with `benchExtras` function and stored in each benchmarks' `extras`. * @template K The type that is calculated with `runExtras` function and stored in each runs' `runExtras`.*/export interface BenchmarkHistory<T = unknown, K = unknown> {  /** The individual runs' values */  history: BenchmarkHistoryItem<T, K>[];}
/** Represents the results of one `runBenchmarks` run.  *  * @template T The type that is calculated with `benchExtras` function and stored in each benchmarks' `extras`. * @template K The type that is calculated with `runExtras` function and stored in each runs' `runExtras`.*/export interface BenchmarkHistoryItem<T = unknown, K = unknown> {  /** The date of the measurement */  date: string;  /** User provided identifier for the run */  id?: string;  /** The object calculated by `runExtras` function if provided in the options */  runExtras?: K;
  /** The individual benchmarks' results for the specific run.  */  benchmarks: {    [key: string]: BenchmarkHistoryRunItem<T>;  };}
/** Represents the results of one benchmark's single run.  *  * @template T The type that is calculated with `benchExtras` function and stored in each benchmarks' `extras`. */export interface BenchmarkHistoryRunItem<T = unknown> {  /** The total time it took to run a given bechmark  */  totalMs: number;  /** Times the benchmark was run in succession. */  runsCount: number;  /** The average time of running the benchmark in milliseconds. */  measuredRunsAvgMs: number;  /** The individual measurements in milliseconds it took to run the benchmark.   *    * Gets saved only, when `saveIndividualRuns` is set in the options. */  measuredRunsMs?: number[];  /** The object calculated by `benchExtras` function if provided in the options. */  extras?: T;}
/** Represent the change in a variable's value. */export interface Delta {  /** The change in percents. */  percent: number;  /** The actual change */  amount: number;}
export type DeltaKey<T = unknown> = (keyof T | "measuredRunsAvgMs" | "totalMs");
/** Handles and enforces the set rules on the historic benchmarking data. *  * Typical usage: * ```ts *  // add benches, then *  *  let historicData; *  try { *      historicData = JSON.parse(Deno.readTextFileSync("./benchmarks/history.json")); *  } catch { *      // Decide whether you want to proceed with no history *      console.warn("⚠ cant read history file"); *  } * *  const history = new prettyBenchmarkHistory(historicData, { *      //options *  }); *  *  runBenchmarks().then((results: BenchmarkRunResult) => { *      history.addResults(results); *      Deno.writeTextFileSync("./benchmarks/history.json", history.getDataString()); *  }); * ``` *  * **Note** *  * The saving and loading of the generated data is the user's responsibility, this class is not doing any file handling. See examples for more info. */export class prettyBenchmarkHistory<T = unknown, K = unknown> {  private data!: BenchmarkHistory<T, K>;  private options?: prettyBenchmarkHistoryOptions<T, K>;
  constructor(    /** The previously saved historic data. */    previousData?: BenchmarkHistory<T, K>,    options?: prettyBenchmarkHistoryOptions<      T,      K    >,  ) {    this.options = options;
    if (previousData) {      this.load(previousData);    } else {      this.init();    }  }
  private init() {    this.data = { history: [] };  }
  private load(previousData: BenchmarkHistory<T, K>) {    this.data = previousData;  }
  /** Stores the run's result into the historic data, enforces all set rules on the results. */  addResults(runResults: BenchmarkRunResult, options?: {    /** Helps to identify the specific run, besides the date.*/    id?: string;    /** Overrides the current date */    date?: Date | string;  }) {    const date: string = options?.date      ? (typeof options.date === "string"        ? options.date        : options.date.toISOString())      : new Date().toISOString();
    const duplicateNames = runResults.results.filter((r) =>      runResults.results.filter((rc) => rc.name === r.name).length > 1    );    if (duplicateNames.length !== 0) {      throw new Error(        `Names must be unique to be able to store them. Colliding names: [${          [...new Set(duplicateNames.map((b) => b.name)).values()].join(", ")        }].`,      );    }
    if (this.options?.minRequiredRuns) {      const notEnoughRuns = runResults.results.filter((r) =>        r.runsCount < this.options?.minRequiredRuns! ||        r.measuredRunsMs.length < this.options?.minRequiredRuns!      );      if (notEnoughRuns.length !== 0) {        throw new Error(          `Minimum required runs (${this.options            ?.minRequiredRuns}) was not fullfilled by benchmarks: [${            notEnoughRuns.map((r) => `"${r.name}" (${r.runsCount})`).join(", ")          }]. The minimum required runs can be set with 'minRequiredRuns' option.`,        );      }    }
    if (!this.options?.easeOnlyHrTime) {      const isHrTime = (ms: number) => ms % 1 !== 0;      if (runResults.results.some((r) => !isHrTime(r.totalMs))) { // TODO consider: check on a subset of measurements too.        throw new Error(          `Seems like you are trying to add results, that were measured without the --allow-hrtime flag. You can bypass this check with the 'easeOnlyHrTime' option.`,        );      }    }
    if (this.options?.strict) {      const strictIsBooleanTrue = typeof this.options?.strict === "boolean" &&        this.options?.strict;
      const hasDataAlready = Object.keys(this.data.history).length !== 0;      if (hasDataAlready) { // strict has no effect on first set of results.        const errors = [];
        const prevBenchmarks = this.getBenchmarkNames();
        prevBenchmarks.forEach((pb) => {          const benchInResults = runResults.results.find((r) => r.name === pb);          if (            strictIsBooleanTrue ||            (this.options?.strict as strictHistoryRules).noRemoval          ) {            if (!benchInResults) {              errors.push(                `Missing benchmark named "${pb}" in current results. Set 'strict' or 'strict.noRemoval' option to false to bypass this check.`,              );            }          }
          if (            strictIsBooleanTrue ||            (this.options?.strict as strictHistoryRules).noRunsCountChange          ) {            const prevRuns = this.data.history.filter((h) => h.benchmarks[pb]);            if (benchInResults && prevRuns.length > 0) {              const lastRun = prevRuns.reverse()[0].benchmarks[pb];              if (lastRun.runsCount !== benchInResults.runsCount) {                errors.push(                  `Runs count of benchmark "${pb}" (${benchInResults.runsCount}) doesnt match the previous runs count (${lastRun.runsCount}). Set 'strict' or 'strict.noRunsCountChange' option to false to bypass this check.`,                );              }            }          }        });
        if (          strictIsBooleanTrue ||          (this.options?.strict as strictHistoryRules).noAddition        ) {          const newBenches = runResults.results.filter((r) =>            prevBenchmarks.indexOf(r.name) === -1          );          if (newBenches.length !== 0) {            errors.push(              `Adding new benches is not allowed after the initial set of benchmarks. New benches: [${                newBenches.map((b) => b.name)              }]. Set 'strict' or 'strict.noAddition' option to false to bypass this check.`,            );          }        }
        // TODO consider: checking changes in extras
        if (errors.length !== 0) {          throw new Error(            `Errors while trying to add new results to history: \n${              errors.join("\n")            }`,          );        }      }    }
    const benchmarks: { [key: string]: BenchmarkHistoryRunItem<T> } = {};    runResults.results.forEach((r) => {      benchmarks[r.name] = {        measuredRunsAvgMs: r.measuredRunsAvgMs,        runsCount: r.runsCount,        totalMs: r.totalMs,        measuredRunsMs: this.options?.saveIndividualRuns          ? r.measuredRunsMs          : undefined,        extras: this.options?.benchExtras && this.options.benchExtras(r),      };    });
    this.data.history.push({      date: date,      id: options?.id,      runExtras: this.options?.runExtras && this.options.runExtras(runResults),      benchmarks: benchmarks,    });
    // TODO! cant initiate date if a different dateformat is used in the string    this.data.history = this.data.history.sort((a, b) => {      return new Date(a.date).getTime() - new Date(b.date).getTime();    });
    return this;  }
  /** Calculates `deltas` for each benchmark in the provided `BenchmarkRunResult` for each provided property key.    *    * Keys are either `measuredRunsAvgMs`, `totalMs` or point to `number` properties of the calculated `extras`.   * Error is thrown, when a key points to a non number property.   * No delta is calculated for key's which are not present in the `extras`   *    * Returns `false` for a given benchmark when there is no history for it. */  getDeltasFrom(    results: BenchmarkRunResult,    keys: DeltaKey<T>[] = ["measuredRunsAvgMs", "totalMs"],  ): { [key: string]: { [key: string]: Delta } } {    const deltas: { [key: string]: { [key: string]: Delta } } = {};
    results.results.forEach((r) => {      const d = this.getDeltaForBenchmark(r, keys);      if (d) {        deltas[r.name] = d;      }    });
    return deltas;  }
  /** Calculates `deltas` for given `BenchmarkResult` for each provided property key.    *    * Keys are either `measuredRunsAvgMs`, `totalMs` or point to `number` properties of the calculated `extras`.   * Error is thrown, when a key points to a non number property.   * No delta is calculated for key's which are not present in the `extras`   *    * Returns `false` when there is no history for the given benchmark. */  getDeltaForBenchmark(    result: BenchmarkResult,    keys: DeltaKey<T>[] = ["measuredRunsAvgMs", "totalMs"],  ) {    const prevResults = this.data.history.filter((h) =>      h.benchmarks[result.name]    );    const lastResult = prevResults.length > 0      ? prevResults[prevResults.length - 1].benchmarks[result.name]      : undefined;
    if (!lastResult) { // no previous result for this benchmark      return false;    }
    const currentResultExtras = this.options?.benchExtras &&      this.options.benchExtras(result);
    // deno-lint-ignore no-explicit-any    const calcDelta = (current: any, prev: any, key: any) => {      if (typeof current[key] !== "number" || typeof prev[key] !== "number") {        throw new Error(          `Type of value selected by key "${key}" must be number`,        );      }
      const diff = current[key] - prev[key];      const percDiff = diff / prev[key];
      return {        percent: percDiff,        amount: diff,      };    };
    const deltas: { [key: string]: Delta } = {};
    keys.forEach((key) => {      if (key === "measuredRunsAvgMs" || key === "totalMs") {        deltas[key as string] = calcDelta(result, lastResult, key);      } else {        if (          !currentResultExtras || typeof currentResultExtras[key] === undefined        ) {          throw new Error(            `No property named "${key}" in calculated extras for the currently measured benchmark named "${result.name}".`,          );        }
        if (!lastResult.extras || !lastResult.extras[key]) { // TODO consider throwing          return false;        }
        deltas[key as string] = calcDelta(          currentResultExtras,          lastResult.extras,          key,        );      }    });
    return deltas;  }
  /** Returns a copy of the historic data. */  getData(): BenchmarkHistory<T, K> {    // no complex objects so should be enough    return JSON.parse(JSON.stringify(this.data));  }
  /** Returns the historic data in a pretty-printed JSON string */  getDataString() {    return JSON.stringify(this.getData(), null, 2);  }
  /** Returns every benchmark's name that is in the historic data. */  getBenchmarkNames() {    return [      ...new Set(        this.data.history.map((h) => Object.keys(h.benchmarks)).flat(),      ),    ];  }}
/** Calculates `Thresholds` from the historic data for each benchmark. *  * **EXPERIMENTAL** The default way of calculating may change, if you relay on thresholds provide your calculation so it wont change unexpectedly *  * The default way the thresholds are calculated: * * only calculate threshold for benchmark, which has at least `5` previous runs * * `green` is the (minimum of the measured `measuredRunsAvgMs`) * `1.1` * * `yellow` is the (maximum of the measured `measuredRunsAvgMs`) * `1.2` *  * This can be overridden with the options.*/export function calculateThresholds<T, K>(  history: prettyBenchmarkHistory<T, K>,  options?: {    minProceedingRuns?: number;    calculate?: (runs: BenchmarkHistoryItem<T, K>[]) => Threshold;  },): Thresholds {  const benchmarkNames = history.getBenchmarkNames();  const data = history.getData();  const thresholds: Thresholds = {};
  benchmarkNames.forEach((bn) => {    const runs = data.history.filter((h) => h.benchmarks[bn]);
    if (runs.length < (options?.minProceedingRuns ?? 5)) {      return;    }
    if (typeof options?.calculate === "function") {      thresholds[bn] = options.calculate(runs);    } else {      const green = Math.min(...runs.map((r) =>        r.benchmarks[bn].measuredRunsAvgMs      )) * 1.1;      const yellow = Math.max(...runs.map((r) =>        r.benchmarks[bn].measuredRunsAvgMs      )) * 1.2;
      thresholds[bn] = { green, yellow };    }  });
  return thresholds;}