From 8a8f91155bd5dc7c98b225b6de9d9f63832b8035 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Tue, 28 Oct 2025 14:36:58 +0000 Subject: [PATCH 1/3] Add example scripts to run pyperformance on a generic hosts --- examples/benchmarking-scripts/README.md | 23 +++ examples/benchmarking-scripts/backfill.py | 63 ++++++++ .../benchmarking-scripts/backfill_shas.txt | 3 + .../benchmarking-scripts/benchmark.conf.in | 102 +++++++++++++ .../benchmarking-scripts/run-pyperformance.sh | 134 ++++++++++++++++++ 5 files changed, 325 insertions(+) create mode 100644 examples/benchmarking-scripts/README.md create mode 100644 examples/benchmarking-scripts/backfill.py create mode 100644 examples/benchmarking-scripts/backfill_shas.txt create mode 100644 examples/benchmarking-scripts/benchmark.conf.in create mode 100755 examples/benchmarking-scripts/run-pyperformance.sh diff --git a/examples/benchmarking-scripts/README.md b/examples/benchmarking-scripts/README.md new file mode 100644 index 00000000..7cc3c7ce --- /dev/null +++ b/examples/benchmarking-scripts/README.md @@ -0,0 +1,23 @@ +# Benchmarking Scripts Toolkit + +Companion assets for running `pyperformance` benchmarks on hosts that provide isolated CPUs and for backfilling historical CPython revisions to [speed.python.org](https://speed.python.org/). + +## Contents +- `run-pyperformance.sh` – shell wrapper that reserves an isolated CPU (175–191) via lockfiles, renders `benchmark.conf` from `benchmark.conf.in` with `m4`, sets up a virtual environment, and runs `pyperformance` with upload enabled. +- `benchmark.conf.in` – template consumed by the wrapper; placeholders `TMPDIR` and `CPUID` are filled in so each run has its own working tree, build directory, and CPU affinity. +- `backfill.py` – Python helper that reads revisions from `backfill_shas.txt` and launches multiple `run-pyperformance.sh` jobs in parallel, capturing stdout/stderr per revision under `output/`. +- `backfill_shas.txt` – example list of `sha=branch` pairs targeted by the backfill script. + +## Typical Workflow +1. Ensure kernel CPU isolation (`isolcpus=175-191`) and the `lockfile` utility are available so the wrapper can pin workloads without contention. +2. Invoke `./run-pyperformance.sh -- compile benchmark.conf ` for an ad-hoc run; the script installs `pyperformance==1.13.0`, clones CPython, and uploads results using the environment label configured in `benchmark.conf.in`. +3. Populate `backfill_shas.txt` with the revisions you want to replay and run `python backfill.py` to batch process them; individual logs land in `output/-.out|.err`. + +Adjust `benchmark.conf.in` if you need to change build parameters (PGO/LTO, job count, upload target, etc.). + +## Scheduled Runs +If you want a daily unattended run, drop an entry like this into `crontab -e` on the host: + +``` +0 0 * * * cd /home/user/pyperformance/examples/benchmarking-scripts && ./run-pyperformance.sh -- compile_all benchmark.conf > /home/pyperf/pyperformance/cron.log 2>&1 +``` diff --git a/examples/benchmarking-scripts/backfill.py b/examples/benchmarking-scripts/backfill.py new file mode 100644 index 00000000..fbebe455 --- /dev/null +++ b/examples/benchmarking-scripts/backfill.py @@ -0,0 +1,63 @@ +import signal +import subprocess +from multiprocessing import Pool +from pathlib import Path + +""" +Parallel backfilling helper for pyperformance runs on isolated CPUs. + +Reads `sha=branch` pairs from backfill_shas.txt, invokes run-pyperformance.sh +for each revision, and lets that wrapper pin the workload to an isolated CPU, +materialize benchmark.conf, build CPython, and upload results to +speed.python.org. Stdout/stderr for each revision are captured under +output/-.(out|err). +""" + + +def get_revisions(): + revisions = [] + with open("backfill_shas.txt", "r") as f: + for line in f: + sha, branch = line.split("=") + revisions.append((sha, branch.rstrip())) + return revisions + + +def run_pyperformance(revision): + sha = revision[0] + branch = revision[1] + print(f"Running run-pyperformance.sh with sha: {sha}, branch: {branch}") + output_dir = Path("output") + output_dir.mkdir(parents=True, exist_ok=True) + out_file = output_dir / f"{branch}-{sha}.out" + err_file = output_dir / f"{branch}-{sha}.err" + with open(out_file, "w") as output, open(err_file, "w") as error: + subprocess.run([ + "./run-pyperformance.sh", + "-x", + "--", + "compile", + "benchmark.conf", + sha, + branch, + ], + stdout=output, + stderr=error, + ) + + +if __name__ == '__main__': + original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) + pool = Pool(10) + signal.signal(signal.SIGINT, original_sigint_handler) + try: + res = pool.map_async(run_pyperformance, get_revisions()) + # Without the timeout this blocking call ignores all signals. + res.get(600) + except KeyboardInterrupt: + print("Caught KeyboardInterrupt, terminating workers") + pool.terminate() + else: + print("Normal termination") + pool.close() + pool.join() diff --git a/examples/benchmarking-scripts/backfill_shas.txt b/examples/benchmarking-scripts/backfill_shas.txt new file mode 100644 index 00000000..4dc650ee --- /dev/null +++ b/examples/benchmarking-scripts/backfill_shas.txt @@ -0,0 +1,3 @@ +5d2edf72d25c2616f0e13d10646460a8e69344fa=main +bd2c7e8c8b10f4d31eab971781de13844bcd07fe=main +29b38b7aae884c14085a918282ea7f0798ed7a2a=main diff --git a/examples/benchmarking-scripts/benchmark.conf.in b/examples/benchmarking-scripts/benchmark.conf.in new file mode 100644 index 00000000..f5eb545d --- /dev/null +++ b/examples/benchmarking-scripts/benchmark.conf.in @@ -0,0 +1,102 @@ +[config] +# Directory where JSON files are written. +# - uploaded files are moved to json_dir/uploaded/ +# - results of patched Python are written into json_dir/patch/ +json_dir = TMPDIR/json + +# If True, compile CPython is debug mode (LTO and PGO disabled), +# run benchmarks with --debug-single-sample, and disable upload. +# +# Use this option used to quickly test a configuration. +debug = False + + +[scm] +# Directory of CPython source code (Git repository) +repo_dir = TMPDIR/cpython + +# Update the Git repository (git fetch)? +update = True + +# Name of the Git remote, used to create revision of +# the Git branch. For example, use revision 'remotes/origin/3.6' +# for the branch '3.6'. +git_remote = remotes/origin + + +[compile] +# Create files into bench_dir: +# - bench_dir/bench-xxx.log +# - bench_dir/prefix/: where Python is installed +# - bench_dir/venv/: Virtual environment used by pyperformance +bench_dir = TMPDIR/bench_tmpdir + +# Link Time Optimization (LTO)? +lto = True + +# Profiled Guided Optimization (PGO)? +pgo = True + +# The space-separated list of libraries that are package-only, +# i.e., locally installed but not on header and library paths. +# For each such library, determine the install path and add an +# appropriate subpath to CFLAGS and LDFLAGS declarations passed +# to configure. As an exception, the prefix for openssl, if that +# library is present here, is passed via the --with-openssl +# option. Currently, this only works with Homebrew on macOS. +# If running on macOS with Homebrew, you probably want to use: +# pkg_only = openssl readline sqlite3 xz zlib +# The version of zlib shipping with macOS probably works as well, +# as long as Apple's SDK headers are installed. +pkg_only = + +# Install Python? If false, run Python from the build directory +# +# WARNING: Running Python from the build directory introduces subtle changes +# compared to running an installed Python. Moreover, creating a virtual +# environment using a Python run from the build directory fails in many cases, +# especially on Python older than 3.4. Only disable installation if you +# really understand what you are doing! +install = True + +# Specify '-j' parameter in 'make' command +jobs = 32 + +[run_benchmark] +# Run "sudo python3 -m pyperf system tune" before running benchmarks? +system_tune = False + +# --manifest option for 'pyperformance run' +manifest = + +# --benchmarks option for 'pyperformance run' +benchmarks = + +# --affinity option for 'pyperf system tune' and 'pyperformance run' +affinity = CPUID + +# Upload generated JSON file? +# +# Upload is disabled on patched Python, in debug mode or if install is +# disabled. +upload = True + +# Configuration to upload results to a Codespeed website +[upload] +url = https://speed.python.org/ +# environment-name should be created on speed.python.org +environment = environment-name +executable = lto-pgo +project = CPython + +[compile_all] +# List of CPython Git branches +branches = main + + +# List of revisions to benchmark by compile_all +[compile_all_revisions] +# list of 'sha1=' (default branch: 'master') or 'sha1=branch' +# used by the "pyperformance compile_all" command +# e.g.: +# 11159d2c9d6616497ef4cc62953a5c3cc8454afb = diff --git a/examples/benchmarking-scripts/run-pyperformance.sh b/examples/benchmarking-scripts/run-pyperformance.sh new file mode 100755 index 00000000..361790fd --- /dev/null +++ b/examples/benchmarking-scripts/run-pyperformance.sh @@ -0,0 +1,134 @@ +#!/bin/bash + + +# Wrapper around pyperformance for hosts with isolated CPUs. Reserves a CPU +# (175-191) via lockfiles, renders benchmark.conf with m4, bootstraps a venv, +# and runs pyperformance pinned to that CPU. Requires kernel isolcpus=175-191 +# and the lockfile utility so concurrent runs do not collide, which is +# especially helpful when backfilling multiple revisions. + + +set -e +set -u +set -o pipefail + +lock_file= +tmpdir= +cleanup() +{ + if [[ -n "${lock_file:-}" ]]; then + echo "Removing $lock_file" + rm -f "$lock_file" + fi + if [[ -n "${tmpdir:-}" ]]; then + echo "Removing $tmpdir" + rm -fr "$tmpdir" + fi + exit +} + +trap cleanup EXIT + +usage() +{ + cat < "$tmpdir/benchmark.conf" + +# This is our working directory from now on +cd "$tmpdir" + +# Install pyperformance in a virtual env. +python3 -m venv venv +venv/bin/pip install pyperformance==1.13.0 + +# Clone cpython +git clone https://github.com/python/cpython.git + +# Run pyperformance +venv/bin/pyperformance "$@" From 1980788fa0cefdd621c6c128f43cc0df2b3fc9b8 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Tue, 28 Oct 2025 16:51:28 +0000 Subject: [PATCH 2/3] Fix ruff formatting --- examples/benchmarking-scripts/backfill.py | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/examples/benchmarking-scripts/backfill.py b/examples/benchmarking-scripts/backfill.py index fbebe455..1f400c79 100644 --- a/examples/benchmarking-scripts/backfill.py +++ b/examples/benchmarking-scripts/backfill.py @@ -32,21 +32,22 @@ def run_pyperformance(revision): out_file = output_dir / f"{branch}-{sha}.out" err_file = output_dir / f"{branch}-{sha}.err" with open(out_file, "w") as output, open(err_file, "w") as error: - subprocess.run([ - "./run-pyperformance.sh", - "-x", - "--", - "compile", - "benchmark.conf", - sha, - branch, - ], - stdout=output, - stderr=error, + subprocess.run( + [ + "./run-pyperformance.sh", + "-x", + "--", + "compile", + "benchmark.conf", + sha, + branch, + ], + stdout=output, + stderr=error, ) -if __name__ == '__main__': +if __name__ == "__main__": original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) pool = Pool(10) signal.signal(signal.SIGINT, original_sigint_handler) From 20264d535f6f118d0f69a072588850a3e31a195f Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Wed, 29 Oct 2025 14:09:49 +0000 Subject: [PATCH 3/3] Address feedback --- examples/benchmarking-scripts/backfill.py | 4 ++-- examples/benchmarking-scripts/benchmark.conf.in | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/benchmarking-scripts/backfill.py b/examples/benchmarking-scripts/backfill.py index 1f400c79..f74f2efd 100644 --- a/examples/benchmarking-scripts/backfill.py +++ b/examples/benchmarking-scripts/backfill.py @@ -49,12 +49,12 @@ def run_pyperformance(revision): if __name__ == "__main__": original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) - pool = Pool(10) + pool = Pool(8) signal.signal(signal.SIGINT, original_sigint_handler) try: res = pool.map_async(run_pyperformance, get_revisions()) # Without the timeout this blocking call ignores all signals. - res.get(600) + res.get(86400) except KeyboardInterrupt: print("Caught KeyboardInterrupt, terminating workers") pool.terminate() diff --git a/examples/benchmarking-scripts/benchmark.conf.in b/examples/benchmarking-scripts/benchmark.conf.in index f5eb545d..b8c73734 100644 --- a/examples/benchmarking-scripts/benchmark.conf.in +++ b/examples/benchmarking-scripts/benchmark.conf.in @@ -60,7 +60,7 @@ pkg_only = install = True # Specify '-j' parameter in 'make' command -jobs = 32 +jobs = 24 [run_benchmark] # Run "sudo python3 -m pyperf system tune" before running benchmarks? @@ -96,7 +96,7 @@ branches = main # List of revisions to benchmark by compile_all [compile_all_revisions] -# list of 'sha1=' (default branch: 'master') or 'sha1=branch' +# list of 'sha1=' (default branch: 'main') or 'sha1=branch' # used by the "pyperformance compile_all" command # e.g.: # 11159d2c9d6616497ef4cc62953a5c3cc8454afb =