---
title: "Benchmarks"
output: arl::arl_html_vignette
pkgdown:
  as_is: true
vignette: >
  %\VignetteIndexEntry{Benchmarks}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
arl::register_knitr_engine()

# Read benchmark data checked out by `make bench-data` (or `make vignettes`).
bench_json <- tryCatch({
  data_file <- "benchmarks/results/data.js"
  if (!file.exists(data_file)) data_file <- file.path("..", data_file)
  raw <- readLines(data_file, warn = FALSE)
  json_text <- sub("^window\\.BENCHMARK_DATA = ", "", paste(raw, collapse = "\n"))
  jsonlite::fromJSON(json_text)
}, error = function(e) NULL)

if (!is.null(bench_json)) {
  runs <- bench_json$entries$Benchmark
  # runs is a data.frame; benches column is a list of data.frames
  n_runs <- nrow(runs)
  benches <- runs$benches[[n_runs]]
  # Also grab the first run for before/after comparisons
  if (n_runs > 1) {
    first_benches <- runs$benches[[1L]]
  } else {
    first_benches <- NULL
  }
}
has_bench_data <- !is.null(bench_json)
```

This page summarizes Arl's performance characteristics.
For full profiling details and optimization history, see
[`benchmarks/PERFORMANCE.md`](https://github.com/wwbrannon/arl/blob/main/benchmarks/PERFORMANCE.md)
in the repository.

## Pipeline Overview

Every Arl expression passes through five stages:

```
Source → Tokenizer → Parser → Macro Expander → Compiler → R eval() → Result
```

The table below shows where time is spent on each of the example programs
shipped with the package:

```{r pipeline-table, echo = FALSE, results = "asis"}
if (has_bench_data) {
  real_names <- c("fibonacci.arl", "quicksort.arl",
                  "graph-paths.arl", "macro-examples.arl")
  components <- c("tokenizer", "parser", "macro", "compile", "r-eval")
  comp_labels <- c("Tokenizer", "Parser", "Macro Expander", "Compiler", "R eval()")

  comp_vals <- sapply(components, function(comp) {
    vapply(real_names, function(wl) {
      idx <- which(benches$name == paste0(comp, "/real/", wl))
      if (length(idx) == 1) benches$value[idx] else NA_real_
    }, numeric(1))
  })

  totals <- rowSums(comp_vals, na.rm = TRUE)
  pct_mat <- 100 * comp_vals / totals

  fmt_cell <- function(ms, pct) {
    if (is.na(ms)) return("\u2014")
    sprintf("%.0f ms (%.0f%%)", ms, pct)
  }

  rows <- lapply(seq_along(real_names), function(i) {
    cells <- vapply(seq_along(components), function(j) {
      fmt_cell(comp_vals[i, j], pct_mat[i, j])
    }, character(1))
    data.frame(
      Workload = paste0("`", real_names[i], "`"),
      Tokenizer = cells[1],
      Parser = cells[2],
      `Macro Expander` = cells[3],
      Compiler = cells[4],
      `R eval()` = cells[5],
      check.names = FALSE,
      stringsAsFactors = FALSE
    )
  })
  tbl <- do.call(rbind, rows)
  knitr::kable(tbl, format = "pipe")
} else {
  cat("*Benchmark data not available — rebuild with access to the `gh-pages` branch.*\n")
}
```

The compiler, R eval(), and macro expander together dominate runtime.
The tokenizer and parser are fast by comparison.

## End-to-End Timings

The same workloads measured end-to-end:

```{r e2e-table, echo = FALSE, results = "asis"}
if (has_bench_data) {
  e2e_idx <- grepl("^e2e/real/", benches$name)
  e2e <- benches[e2e_idx, , drop = FALSE]
  e2e$workload <- sub("^e2e/real/", "", e2e$name)

  tbl <- data.frame(
    Workload = paste0("`", e2e$workload, "`"),
    `Execution Time` = sprintf("~%.0f ms", e2e$value),
    check.names = FALSE,
    stringsAsFactors = FALSE
  )
  knitr::kable(tbl, format = "pipe")
} else {
  cat("*Benchmark data not available — rebuild with access to the `gh-pages` branch.*\n")
}
```

These are end-to-end times (i.e., tokenize + parse + expand + compile +
evaluate), and also include engine startup and module loading.

## Optimizations Applied

Three O(n²) bottlenecks were identified by code inspection and fixed:

1. **Tokenizer string accumulation** — character-by-character `c()` replaced
   with a regex-based approach that processes entire tokens in one pass,
   eliminating per-character overhead entirely.

2. **Parser list growing** — repeated `c(elements, list(elem))` replaced
   with chunked collection. ~9× improvement for large flat lists
   (1000 elements: ~60 ms → 6.9 ms).

3. **CPS overhead removed** — the evaluator was converted from
   continuation-passing style with trampolines to a compiler that emits
   R code evaluated by R's native `eval()`, removing per-expression
   closure allocation.

```{r before-after, echo = FALSE, results = "asis"}
if (has_bench_data && !is.null(first_benches)) {
  # Compare first run (baseline) to latest run for e2e/real workloads
  e2e_names <- benches$name[grepl("^e2e/real/", benches$name)]
  rows <- lapply(e2e_names, function(nm) {
    before <- first_benches$value[first_benches$name == nm]
    after  <- benches$value[benches$name == nm]
    if (length(before) == 1 && length(after) == 1) {
      data.frame(
        Workload = paste0("`", sub("^e2e/real/", "", nm), "`"),
        Before = sprintf("~%.0f ms", before),
        After = sprintf("~%.0f ms", after),
        Speedup = sprintf("%.1fx", before / after),
        check.names = FALSE,
        stringsAsFactors = FALSE
      )
    }
  })
  tbl <- do.call(rbind, rows)
  if (!is.null(tbl) && nrow(tbl) > 0) {
    cat("\nBefore/after comparison (first recorded run vs latest):\n\n")
    print(knitr::kable(tbl, format = "pipe"))
  }
} else if (!has_bench_data) {
  cat("*Benchmark data not available.*\n")
}
```

All fixes preserve correctness (full test suite passes) and significantly
improve performance on most code, as well as preventing worst-case blowups.

## Benchmark Data

Historical benchmark results are stored on the `gh-pages` branch in
`dev/bench/data.js`. You can inspect them with:

```bash
git show gh-pages:dev/bench/data.js
```

Each entry records a benchmark run with commit metadata and per-benchmark
timings. The tables on this page are generated from the latest run.

## Running Benchmarks

From the repository root:

```bash
make bench
```

This runs the full benchmark suite (component-level and end-to-end) and
saves results to `benchmarks/results/`. Individual component benchmarks
are also available:

```bash
# COMPONENT can be tokenizer, parser, macro, compile, r-eval, stdlib, e2e
make bench-component COMPONENT=tokenizer
```

Profiling reports (HTML flame graphs) can be generated with:

```bash
make profile

# View: open benchmarks/profiles/eval-fibonacci.html
```