diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index 4f41630638..cd2397a189 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -16,7 +16,6 @@ concurrency:
 env:
   CARGO_TERM_COLOR: always
   POWDR_GENERATE_PROOFS: "true"
-  POWDR_JIT_OPT_LEVEL: "0"
   MAX_DEGREE_LOG: "20"
 
 jobs:
diff --git a/executor/src/witgen/jit/block_machine_processor.rs b/executor/src/witgen/jit/block_machine_processor.rs
index 9aa9303fc7..c691b0d51a 100644
--- a/executor/src/witgen/jit/block_machine_processor.rs
+++ b/executor/src/witgen/jit/block_machine_processor.rs
@@ -1,20 +1,21 @@
-use std::collections::{BTreeSet, HashSet};
+use std::collections::HashSet;
 
 use bit_vec::BitVec;
 use itertools::Itertools;
-use powdr_ast::analyzed::{
-    AlgebraicReference, Identity, PolyID, PolynomialType, SelectedExpressions,
-};
+use powdr_ast::analyzed::AlgebraicReference;
 use powdr_number::FieldElement;
 
-use crate::witgen::{jit::effect::format_code, machines::MachineParts, FixedData};
+use crate::witgen::{jit::processor::Processor, machines::MachineParts, FixedData};
 
 use super::{
     effect::Effect,
-    variable::{Cell, Variable},
-    witgen_inference::{CanProcessCall, FixedEvaluator, Value, WitgenInference},
+    variable::Variable,
+    witgen_inference::{CanProcessCall, FixedEvaluator, WitgenInference},
 };
 
+/// This is a tuning value. It is the maximum nesting depth of branches in the JIT code.
+const BLOCK_MACHINE_MAX_BRANCH_DEPTH: usize = 6;
+
 /// A processor for generating JIT code for a block machine.
 pub struct BlockMachineProcessor<'a, T: FieldElement> {
     fixed_data: &'a FixedData<'a, T>,
@@ -74,204 +75,51 @@ impl<'a, T: FieldElement> BlockMachineProcessor<'a, T> {
             witgen.assign_variable(expr, self.latch_row as i32, Variable::Param(index));
         }
 
-        // Solve for the block witness.
-        // Fails if any machine call cannot be completed.
-        match self.solve_block(can_process, &mut witgen, connection.right) {
-            Ok(()) => Ok(witgen.finish()),
-            Err(e) => {
-                log::trace!("\nCode generation failed for connection:\n  {connection}");
-                let known_args_str = known_args
-                    .iter()
-                    .enumerate()
-                    .filter_map(|(i, b)| b.then_some(connection.right.expressions[i].to_string()))
-                    .join("\n  ");
-                log::trace!("Known arguments:\n  {known_args_str}");
-                log::trace!("Error:\n  {e}");
-                log::trace!(
-                    "The following code was generated so far:\n{}",
-                    format_code(witgen.code())
-                );
-                Err(format!("Code generation failed: {e}\nRun with RUST_LOG=debug to see the code generated so far."))
-            }
-        }
+        let identities = self.row_range().flat_map(move |row| {
+            self.machine_parts
+                .identities
+                .iter()
+                .map(move |&id| (id, row))
+        });
+        let requested_known = known_args
+            .iter()
+            .enumerate()
+            .filter_map(|(i, is_input)| (!is_input).then_some(Variable::Param(i)));
+        Processor::new(
+            self.fixed_data,
+            self,
+            identities,
+            self.block_size,
+            true,
+            requested_known,
+            BLOCK_MACHINE_MAX_BRANCH_DEPTH,
+        )
+        .generate_code(can_process, witgen)
+        .map_err(|e| {
+            let err_str = e.to_string_with_variable_formatter(|var| match var {
+                Variable::Param(i) => format!("{}", &connection.right.expressions[*i]),
+                _ => var.to_string(),
+            });
+            log::trace!("\nCode generation failed for connection:\n  {connection}");
+            let known_args_str = known_args
+                .iter()
+                .enumerate()
+                .filter_map(|(i, b)| b.then_some(connection.right.expressions[i].to_string()))
+                .join("\n  ");
+            log::trace!("Known arguments:\n  {known_args_str}");
+            log::trace!("Error:\n  {err_str}");
+            let shortened_error = err_str
+                .lines()
+                .take(10)
+                .format("\n  ");
+            format!("Code generation failed: {shortened_error}\nRun with RUST_LOG=trace to see the code generated so far.")
+        })
     }
 
     fn row_range(&self) -> std::ops::Range<i32> {
         // We iterate over all rows of the block +/- one row, so that we can also solve for non-rectangular blocks.
         -1..(self.block_size + 1) as i32
     }
-
-    /// Repeatedly processes all identities on all rows, until no progress is made.
-    /// Fails iff there are incomplete machine calls in the latch row.
-    fn solve_block<CanProcess: CanProcessCall<T> + Clone>(
-        &self,
-        can_process: CanProcess,
-        witgen: &mut WitgenInference<'a, T, &Self>,
-        connection_rhs: &SelectedExpressions<T>,
-    ) -> Result<(), String> {
-        let mut complete = HashSet::new();
-        for iteration in 0.. {
-            let mut progress = false;
-
-            for row in self.row_range() {
-                for id in &self.machine_parts.identities {
-                    if !complete.contains(&(id.id(), row)) {
-                        let result = witgen.process_identity(can_process.clone(), id, row);
-                        if result.complete {
-                            complete.insert((id.id(), row));
-                        }
-                        progress |= result.progress;
-                    }
-                }
-            }
-            if !progress {
-                log::trace!(
-                    "Finishing block machine witgen code generation after {iteration} iterations"
-                );
-                break;
-            }
-        }
-
-        for (index, expr) in connection_rhs.expressions.iter().enumerate() {
-            if !witgen.is_known(&Variable::Param(index)) {
-                return Err(format!(
-                    "Unable to derive algorithm to compute output value \"{expr}\""
-                ));
-            }
-        }
-
-        if let Err(e) = self.check_block_shape(witgen) {
-            // Fail hard, as this should never happen for a correctly detected block machine.
-            log::debug!(
-                "The following code was generated so far:\n{}",
-                format_code(witgen.code())
-            );
-            panic!("{e}");
-        }
-        self.check_incomplete_machine_calls(&complete)?;
-
-        Ok(())
-    }
-
-    /// After solving, the known values should be such that we can stack different blocks.
-    fn check_block_shape(&self, witgen: &mut WitgenInference<'a, T, &Self>) -> Result<(), String> {
-        let known_columns = witgen
-            .known_variables()
-            .iter()
-            .filter_map(|var| match var {
-                Variable::Cell(cell) => Some(cell.id),
-                _ => None,
-            })
-            .collect::<BTreeSet<_>>();
-
-        let can_stack = known_columns.iter().all(|column_id| {
-            // Increase the range by 1, because in row <block_size>,
-            // we might have processed an identity with next references.
-            let row_range = self.row_range();
-            let values = (row_range.start..(row_range.end + 1))
-                .map(|row| {
-                    witgen.value(&Variable::Cell(Cell {
-                        id: *column_id,
-                        row_offset: row,
-                        // Dummy value, the column name is ignored in the implementation
-                        // of Cell::eq, etc.
-                        column_name: "".to_string(),
-                    }))
-                })
-                .collect::<Vec<_>>();
-
-            // Two values that refer to the same row (modulo block size) are compatible if:
-            // - One of them is unknown, or
-            // - Both are concrete and equal
-            let is_compatible = |v1: Value<T>, v2: Value<T>| match (v1, v2) {
-                (Value::Unknown, _) | (_, Value::Unknown) => true,
-                (Value::Concrete(a), Value::Concrete(b)) => a == b,
-                _ => false,
-            };
-            // A column is stackable if all rows equal to each other modulo
-            // the block size are compatible.
-            let stackable = (0..(values.len() - self.block_size))
-                .all(|i| is_compatible(values[i], values[i + self.block_size]));
-
-            if !stackable {
-                let column_name = self.fixed_data.column_name(&PolyID {
-                    id: *column_id,
-                    ptype: PolynomialType::Committed,
-                });
-                let block_list = values.iter().skip(1).take(self.block_size).join(", ");
-                let column_str = format!(
-                    "... {} | {} | {} ...",
-                    values[0],
-                    block_list,
-                    values[self.block_size + 1]
-                );
-                log::error!("Column {column_name} is not stackable:\n{column_str}");
-            }
-
-            stackable
-        });
-
-        match can_stack {
-            true => Ok(()),
-            false => Err("Block machine shape does not allow stacking".to_string()),
-        }
-    }
-
-    /// If any machine call could not be completed, that's bad because machine calls typically have side effects.
-    /// So, the underlying lookup / permutation / bus argument likely does not hold.
-    /// This function checks that all machine calls are complete, at least for a window of <block_size> rows.
-    fn check_incomplete_machine_calls(&self, complete: &HashSet<(u64, i32)>) -> Result<(), String> {
-        let machine_calls = self
-            .machine_parts
-            .identities
-            .iter()
-            .filter(|id| is_machine_call(id));
-
-        let incomplete_machine_calls = machine_calls
-            .flat_map(|call| {
-                let complete_rows = self
-                    .row_range()
-                    .filter(|row| complete.contains(&(call.id(), *row)))
-                    .collect::<Vec<_>>();
-                // Because we process rows -1..block_size+1, it is fine to have two incomplete machine calls,
-                // as long as <block_size> consecutive rows are complete.
-                if complete_rows.len() >= self.block_size {
-                    let (min, max) = complete_rows.iter().minmax().into_option().unwrap();
-                    let is_consecutive = max - min == complete_rows.len() as i32 - 1;
-                    if is_consecutive {
-                        return vec![];
-                    }
-                }
-                self.row_range()
-                    .filter(|row| !complete.contains(&(call.id(), *row)))
-                    .map(|row| (call, row))
-                    .collect::<Vec<_>>()
-            })
-            .collect::<Vec<_>>();
-
-        if !incomplete_machine_calls.is_empty() {
-            Err(format!(
-                "Incomplete machine calls:\n  {}",
-                incomplete_machine_calls
-                    .iter()
-                    .map(|(identity, row)| format!("{identity} (row {row})"))
-                    .join("\n  ")
-            ))
-        } else {
-            Ok(())
-        }
-    }
-}
-
-fn is_machine_call<T>(identity: &Identity<T>) -> bool {
-    match identity {
-        Identity::Lookup(_)
-        | Identity::Permutation(_)
-        | Identity::PhantomLookup(_)
-        | Identity::PhantomPermutation(_)
-        | Identity::PhantomBusInteraction(_) => true,
-        Identity::Polynomial(_) | Identity::Connect(_) => false,
-    }
 }
 
 impl<T: FieldElement> FixedEvaluator<T> for &BlockMachineProcessor<'_, T> {
@@ -310,7 +158,10 @@ mod test {
     use crate::witgen::{
         data_structures::mutable_state::MutableState,
         global_constraints,
-        jit::{effect::Effect, test_util::read_pil},
+        jit::{
+            effect::{format_code, Effect},
+            test_util::read_pil,
+        },
         machines::{machine_extractor::MachineExtractor, KnownMachine, Machine},
         FixedData,
     };
@@ -395,11 +246,11 @@ params[2] = Add::c[0];"
             .err()
             .unwrap();
         assert!(err_str
-            .contains("Unable to derive algorithm to compute output value \"Unconstrained::c\""));
+            .contains("The following variables or values are still missing: Unconstrained::c"));
     }
 
     #[test]
-    #[should_panic = "Block machine shape does not allow stacking"]
+    #[should_panic = "Column NotStackable::a is not stackable in a 1-row block"]
     fn not_stackable() {
         let input = "
         namespace Main(256);
diff --git a/executor/src/witgen/jit/compiler.rs b/executor/src/witgen/jit/compiler.rs
index 582d2234b7..98e329eab6 100644
--- a/executor/src/witgen/jit/compiler.rs
+++ b/executor/src/witgen/jit/compiler.rs
@@ -76,8 +76,9 @@ pub fn compile_effects<T: FieldElement>(
 
     record_start("JIT-compilation");
     let start = std::time::Instant::now();
-    log::trace!("Calling cargo...");
-    let r = powdr_jit_compiler::call_cargo(&code);
+    let opt_level = 0;
+    log::trace!("Compiling the following code using optimization level {opt_level}:\n{code}");
+    let r = powdr_jit_compiler::call_cargo(&code, Some(opt_level));
     log::trace!("Done compiling, took {:.2}s", start.elapsed().as_secs_f32());
     record_end("JIT-compilation");
     let lib_path = r.map_err(|e| format!("Failed to compile generated code: {e}"))?;
diff --git a/executor/src/witgen/jit/mod.rs b/executor/src/witgen/jit/mod.rs
index c2b33e43f2..4839e76318 100644
--- a/executor/src/witgen/jit/mod.rs
+++ b/executor/src/witgen/jit/mod.rs
@@ -8,5 +8,6 @@ mod symbolic_expression;
 mod variable;
 pub(crate) mod witgen_inference;
 
+mod processor;
 #[cfg(test)]
 pub(crate) mod test_util;
diff --git a/executor/src/witgen/jit/processor.rs b/executor/src/witgen/jit/processor.rs
new file mode 100644
index 0000000000..576322ad5a
--- /dev/null
+++ b/executor/src/witgen/jit/processor.rs
@@ -0,0 +1,394 @@
+#![allow(dead_code)]
+use std::{
+    collections::{BTreeSet, HashSet},
+    fmt::{self, Display, Formatter, Write},
+};
+
+use itertools::Itertools;
+use powdr_ast::analyzed::{Identity, PolyID, PolynomialType};
+use powdr_number::FieldElement;
+
+use crate::witgen::FixedData;
+
+use super::{
+    effect::{format_code, Effect},
+    variable::{Cell, Variable},
+    witgen_inference::{BranchResult, CanProcessCall, FixedEvaluator, Value, WitgenInference},
+};
+
+/// A generic processor for generating JIT code.
+pub struct Processor<'a, T: FieldElement, FixedEval> {
+    fixed_data: &'a FixedData<'a, T>,
+    /// An evaluator for fixed columns
+    fixed_evaluator: FixedEval,
+    /// List of identities and row offsets to process them on.
+    identities: Vec<(&'a Identity<T>, i32)>,
+    /// The size of a block.
+    block_size: usize,
+    /// If the processor should check for correctly stackable block shapes.
+    check_block_shape: bool,
+    /// List of variables we want to be known at the end. One of them not being known
+    /// is a failure.
+    requested_known_vars: Vec<Variable>,
+    /// Maximum branch depth allowed.
+    max_branch_depth: usize,
+}
+
+impl<'a, T: FieldElement, FixedEval: FixedEvaluator<T>> Processor<'a, T, FixedEval> {
+    pub fn new(
+        fixed_data: &'a FixedData<'a, T>,
+        fixed_evaluator: FixedEval,
+        identities: impl IntoIterator<Item = (&'a Identity<T>, i32)>,
+        block_size: usize,
+        check_block_shape: bool,
+        requested_known_vars: impl IntoIterator<Item = Variable>,
+        max_branch_depth: usize,
+    ) -> Self {
+        Self {
+            fixed_data,
+            fixed_evaluator,
+            identities: identities.into_iter().collect(),
+            block_size,
+            check_block_shape,
+            requested_known_vars: requested_known_vars.into_iter().collect(),
+            max_branch_depth,
+        }
+    }
+
+    pub fn generate_code<CanProcess: CanProcessCall<T> + Clone>(
+        &self,
+        can_process: CanProcess,
+        witgen: WitgenInference<'a, T, FixedEval>,
+    ) -> Result<Vec<Effect<T, Variable>>, Error<'a, T>> {
+        let complete = Default::default();
+        let branch_depth = 0;
+        self.generate_code_for_branch(can_process, witgen, complete, branch_depth)
+    }
+
+    fn generate_code_for_branch<CanProcess: CanProcessCall<T> + Clone>(
+        &self,
+        can_process: CanProcess,
+        mut witgen: WitgenInference<'a, T, FixedEval>,
+        mut complete: HashSet<(u64, i32)>,
+        branch_depth: usize,
+    ) -> Result<Vec<Effect<T, Variable>>, Error<'a, T>> {
+        self.process_until_no_progress(can_process.clone(), &mut witgen, &mut complete);
+
+        if self.check_block_shape {
+            // Check that the "spill" into the previous block is compatible
+            // with the "missing pieces" in the next block.
+            // If this is not the case, this is a hard error
+            // (i.e. cannot be fixed by runtime witgen) and thus we panic inside.
+            // We could do this only at the end of each branch, but it's a bit
+            // more convenient to do it here.
+            self.check_block_shape(&witgen);
+        }
+
+        // Check that we could derive all requested variables.
+        let missing_variables = self
+            .requested_known_vars
+            .iter()
+            .filter(|var| !witgen.is_known(var))
+            // Sort to get deterministic code.
+            .sorted()
+            .cloned()
+            .collect_vec();
+
+        let incomplete_machine_calls = self.incomplete_machine_calls(&complete);
+        if missing_variables.is_empty() && incomplete_machine_calls.is_empty() {
+            return Ok(witgen.code());
+        }
+
+        // We need to do some work, try to branch.
+        let most_constrained_var = witgen
+            .known_variables()
+            .iter()
+            .map(|var| (var, witgen.range_constraint(var)))
+            .filter(|(_, rc)| rc.try_to_single_value().is_none())
+            .sorted()
+            .min_by_key(|(_, rc)| rc.range_width())
+            .map(|(var, _)| var.clone());
+        if branch_depth >= self.max_branch_depth || most_constrained_var.is_none() {
+            let reason = if most_constrained_var.is_none() {
+                ErrorReason::NoBranchVariable
+            } else {
+                ErrorReason::MaxBranchDepthReached(self.max_branch_depth)
+            };
+            let incomplete_identities = self
+                .identities
+                .iter()
+                .filter(|(id, row_offset)| !complete.contains(&(id.id(), *row_offset)))
+                .map(|(id, row_offset)| (*id, *row_offset))
+                .collect_vec();
+            return Err(Error {
+                reason,
+                code: witgen.code(),
+                missing_variables,
+                incomplete_identities,
+            });
+        };
+        let most_constrained_var = most_constrained_var.unwrap();
+
+        log::debug!(
+            "Branching on variable {most_constrained_var} with range {} at depth {branch_depth}",
+            witgen.range_constraint(&most_constrained_var)
+        );
+
+        let BranchResult {
+            common_code,
+            condition,
+            branches: [first_branch, second_branch],
+        } = witgen.branch_on(&most_constrained_var.clone());
+
+        // TODO Tuning: If this fails (or also if it does not generate progress right away),
+        // we could also choose a different variable to branch on.
+        let left_branch_code = self.generate_code_for_branch(
+            can_process.clone(),
+            first_branch,
+            complete.clone(),
+            branch_depth + 1,
+        )?;
+        let right_branch_code =
+            self.generate_code_for_branch(can_process, second_branch, complete, branch_depth + 1)?;
+        let code = if left_branch_code == right_branch_code {
+            common_code.into_iter().chain(left_branch_code).collect()
+        } else {
+            common_code
+                .into_iter()
+                .chain(std::iter::once(Effect::Branch(
+                    condition,
+                    left_branch_code,
+                    right_branch_code,
+                )))
+                .collect()
+        };
+
+        Ok(code)
+    }
+
+    fn process_until_no_progress<CanProcess: CanProcessCall<T> + Clone>(
+        &self,
+        can_process: CanProcess,
+        witgen: &mut WitgenInference<'a, T, FixedEval>,
+        complete: &mut HashSet<(u64, i32)>,
+    ) {
+        let mut progress = true;
+        while progress {
+            progress = false;
+
+            // TODO At this point, we should call a function on `witgen`
+            // to propagate known concrete values across the identities
+            // to other known (but not concrete) variables.
+
+            for (id, row_offset) in &self.identities {
+                if complete.contains(&(id.id(), *row_offset)) {
+                    continue;
+                }
+                let result = witgen.process_identity(can_process.clone(), id, *row_offset);
+                progress |= result.progress;
+                if result.complete {
+                    complete.insert((id.id(), *row_offset));
+                }
+            }
+        }
+    }
+
+    /// If any machine call could not be completed, that's bad because machine calls typically have side effects.
+    /// So, the underlying lookup / permutation / bus argument likely does not hold.
+    /// This function checks that all machine calls are complete, at least for a window of <block_size> rows.
+    /// It returns the list of incomplete calls, if any.
+    fn incomplete_machine_calls(&self, complete: &HashSet<(u64, i32)>) -> Vec<(&Identity<T>, i32)> {
+        self.identities
+            .iter()
+            .map(|(id, _)| id)
+            .filter(|id| is_machine_call(id))
+            .unique()
+            .flat_map(|&call| {
+                let rows = self.rows_for_identity(call);
+                let complete_rows = rows
+                    .iter()
+                    .filter(|&&row| complete.contains(&(call.id(), row)))
+                    .collect::<Vec<_>>();
+                // We might process more rows than `self.block_size`, so we check
+                // that the complete calls are on consecutive rows.
+                if complete_rows.len() >= self.block_size {
+                    let (min, max) = complete_rows.iter().minmax().into_option().unwrap();
+                    // TODO instead of checking for consecutive rows, we could also check
+                    // that they "fit" the next block.
+                    // TODO actually I think that we should not allow more than block size
+                    // completed calls.
+                    let is_consecutive = *max - *min == complete_rows.len() as i32 - 1;
+                    if is_consecutive {
+                        return vec![];
+                    }
+                }
+                rows.iter()
+                    .filter(|&row| !complete.contains(&(call.id(), *row)))
+                    .map(|row| (call, *row))
+                    .collect::<Vec<_>>()
+            })
+            .collect::<Vec<_>>()
+    }
+
+    /// Returns the list of rows the given identity is processed on.
+    fn rows_for_identity(&self, identity: &Identity<T>) -> Vec<i32> {
+        self.identities
+            .iter()
+            .filter_map(move |(id, row_offset)| {
+                if *id == identity {
+                    Some(*row_offset)
+                } else {
+                    None
+                }
+            })
+            .collect()
+    }
+
+    /// After solving, the known cells should be such that we can stack different blocks.
+    /// If this is not the case, this function panics.
+    /// TODO the same is actually true for machine calls.
+    fn check_block_shape(&self, witgen: &WitgenInference<'a, T, FixedEval>) {
+        let known_columns: BTreeSet<_> = witgen
+            .known_variables()
+            .iter()
+            .filter_map(|var| match var {
+                Variable::Cell(cell) => Some(cell.id),
+                _ => None,
+            })
+            .collect();
+        for column_id in known_columns {
+            let known_rows = witgen
+                .known_variables()
+                .iter()
+                .filter_map(|var| match var {
+                    Variable::Cell(cell) if cell.id == column_id => Some(cell.row_offset),
+                    _ => None,
+                })
+                .collect::<BTreeSet<_>>();
+
+            // Two values that refer to the same row (modulo block size) are compatible if:
+            // - One of them is unknown, or
+            // - Both are concrete and equal
+            let is_compatible = |v1: Value<T>, v2: Value<T>| match (v1, v2) {
+                (Value::Unknown, _) | (_, Value::Unknown) => true,
+                (Value::Concrete(a), Value::Concrete(b)) => a == b,
+                _ => false,
+            };
+            let cell_var = |row_offset| {
+                Variable::Cell(Cell {
+                    // Column name does not matter.
+                    column_name: "".to_string(),
+                    id: column_id,
+                    row_offset,
+                })
+            };
+
+            // A column is stackable if all rows equal to each other modulo
+            // the block size are compatible.
+            for row in &known_rows {
+                let this_val = witgen.value(&cell_var(*row));
+                let next_block_val = witgen.value(&cell_var(row + self.block_size as i32));
+                if !is_compatible(this_val, next_block_val) {
+                    let column_name = self.fixed_data.column_name(&PolyID {
+                        id: column_id,
+                        ptype: PolynomialType::Committed,
+                    });
+                    let row_vals = known_rows
+                        .iter()
+                        .map(|&r| format!("  row {r}: {}\n", witgen.value(&cell_var(r))))
+                        .format("");
+                    panic!(
+                        "Column {column_name} is not stackable in a {}-row block, conflict in rows {row} and {}.\n{row_vals}",
+                        self.block_size,
+                        row + self.block_size as i32
+                    );
+                }
+            }
+        }
+    }
+}
+
+fn is_machine_call<T>(identity: &Identity<T>) -> bool {
+    match identity {
+        Identity::Lookup(_)
+        | Identity::Permutation(_)
+        | Identity::PhantomLookup(_)
+        | Identity::PhantomPermutation(_)
+        | Identity::PhantomBusInteraction(_) => true,
+        Identity::Polynomial(_) | Identity::Connect(_) => false,
+    }
+}
+
+pub struct Error<'a, T: FieldElement> {
+    /// Code generated so far
+    pub code: Vec<Effect<T, Variable>>,
+    pub reason: ErrorReason,
+    /// Required variables that could not be determined
+    pub missing_variables: Vec<Variable>,
+    /// Identities that could not be performed properly.
+    /// Note that we only force submachine calls to be complete.
+    pub incomplete_identities: Vec<(&'a Identity<T>, i32)>,
+}
+
+pub enum ErrorReason {
+    NoBranchVariable,
+    MaxBranchDepthReached(usize),
+}
+
+impl<T: FieldElement> Display for Error<'_, T> {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            self.to_string_with_variable_formatter(|var| var.to_string())
+        )
+    }
+}
+
+impl<T: FieldElement> Error<'_, T> {
+    pub fn to_string_with_variable_formatter(
+        &self,
+        var_formatter: impl Fn(&Variable) -> String,
+    ) -> String {
+        let mut s = String::new();
+        let reason_str = match &self.reason {
+            ErrorReason::NoBranchVariable => "No variable available to branch on".to_string(),
+            ErrorReason::MaxBranchDepthReached(depth) => {
+                format!("Maximum branch depth of {depth} reached")
+            }
+        };
+        write!(
+            s,
+            "Unable to derive algorithm to compute required values: {reason_str}."
+        )
+        .unwrap();
+        if !self.missing_variables.is_empty() {
+            write!(
+                s,
+                "\nThe following variables or values are still missing: {}",
+                self.missing_variables
+                    .iter()
+                    .map(var_formatter)
+                    .format(", ")
+            )
+            .unwrap();
+        };
+        if !self.incomplete_identities.is_empty() {
+            write!(
+                s,
+                "\nThe following identities have not been fully processed:\n{}",
+                self.incomplete_identities
+                    .iter()
+                    .map(|(id, row_offset)| format!("    {id} at row {row_offset}"))
+                    .join("\n")
+            )
+            .unwrap();
+        };
+        if self.code.is_empty() {
+            write!(s, "\nNo code generated so far.").unwrap();
+        } else {
+            write!(s, "\nGenerated code so far:\n{}", format_code(&self.code)).unwrap();
+        };
+        s
+    }
+}
diff --git a/executor/src/witgen/jit/single_step_processor.rs b/executor/src/witgen/jit/single_step_processor.rs
index e64669a670..20f23f7e42 100644
--- a/executor/src/witgen/jit/single_step_processor.rs
+++ b/executor/src/witgen/jit/single_step_processor.rs
@@ -1,5 +1,4 @@
 #![allow(dead_code)]
-use std::collections::HashSet;
 
 use itertools::Itertools;
 use powdr_ast::analyzed::{AlgebraicReference, PolyID};
@@ -9,10 +8,14 @@ use crate::witgen::{machines::MachineParts, FixedData};
 
 use super::{
     effect::Effect,
+    processor::Processor,
     variable::{Cell, Variable},
-    witgen_inference::{BranchResult, CanProcessCall, FixedEvaluator, WitgenInference},
+    witgen_inference::{CanProcessCall, FixedEvaluator, WitgenInference},
 };
 
+/// This is a tuning value. It is the maximum nesting depth of branches in the JIT code.
+const SINGLE_STEP_MACHINE_MAX_BRANCH_DEPTH: usize = 6;
+
 /// A processor for generating JIT code that computes the next row from the previous row.
 pub struct SingleStepProcessor<'a, T: FieldElement> {
     fixed_data: &'a FixedData<'a, T>,
@@ -31,148 +34,42 @@ impl<'a, T: FieldElement> SingleStepProcessor<'a, T> {
         &self,
         can_process: CanProcess,
     ) -> Result<Vec<Effect<T, Variable>>, String> {
-        self.generate_code_for_branch(can_process, self.initialize_witgen(), Default::default())
-    }
-
-    pub fn generate_code_for_branch<CanProcess: CanProcessCall<T> + Clone>(
-        &self,
-        can_process: CanProcess,
-        mut witgen: WitgenInference<'a, T, NoEval>,
-        mut complete: HashSet<u64>,
-    ) -> Result<Vec<Effect<T, Variable>>, String> {
-        self.process_until_no_progress(can_process.clone(), &mut witgen, &mut complete);
-
-        // Check that we could derive all witness values in the next row.
-        let unknown_witnesses = self
-            .unknown_witness_cols_on_next_row(&witgen)
-            // Sort to get deterministic code.
+        let all_witnesses = self
+            .machine_parts
+            .witnesses
+            .iter()
+            .cloned()
             .sorted()
             .collect_vec();
-
-        let missing_identities = self.machine_parts.identities.len() - complete.len();
-        let code = if unknown_witnesses.is_empty() && missing_identities == 0 {
-            witgen.finish()
-        } else {
-            let Some(most_constrained_var) = witgen
-                .known_variables()
-                .iter()
-                .map(|var| (var, witgen.range_constraint(var)))
-                .filter(|(_, rc)| rc.try_to_single_value().is_none())
-                .sorted()
-                .min_by_key(|(_, rc)| rc.range_width())
-                .map(|(var, _)| var.clone())
-            else {
-                let incomplete_identities = self
-                    .machine_parts
-                    .identities
-                    .iter()
-                    .filter(|id| !complete.contains(&id.id()));
-                let column_errors = if unknown_witnesses.is_empty() {
-                    "".to_string()
-                } else {
-                    format!(
-                        "\nThe following columns are still missing: {}",
-                        unknown_witnesses
-                            .iter()
-                            .map(|wit| self.fixed_data.column_name(wit))
-                            .format(", ")
-                    )
-                };
-                let identity_errors = if missing_identities == 0 {
-                    "".to_string()
-                } else {
-                    format!(
-                        "\nThe following identities have not been fully processed:\n{}",
-                        incomplete_identities
-                            .map(|id| format!("    {id}"))
-                            .join("\n")
-                    )
-                };
-                return Err(format!(
-                    "Unable to derive algorithm to compute values for witness columns in the next row and\n\
-                    unable to branch on a variable.{column_errors}{identity_errors}",
-                ));
-            };
-
-            let BranchResult {
-                common_code,
-                condition,
-                branches: [first_branch, second_branch],
-            } = witgen.branch_on(&most_constrained_var.clone());
-
-            // TODO Tuning: If this fails (or also if it does not generate progress right away),
-            // we could also choose a different variable to branch on.
-            let left_branch_code =
-                self.generate_code_for_branch(can_process.clone(), first_branch, complete.clone())?;
-            let right_branch_code =
-                self.generate_code_for_branch(can_process, second_branch, complete)?;
-            if left_branch_code == right_branch_code {
-                common_code.into_iter().chain(left_branch_code).collect()
-            } else {
-                common_code
-                    .into_iter()
-                    .chain(std::iter::once(Effect::Branch(
-                        condition,
-                        left_branch_code,
-                        right_branch_code,
-                    )))
-                    .collect()
-            }
-        };
-        Ok(code)
-    }
-
-    fn initialize_witgen(&self) -> WitgenInference<'a, T, NoEval> {
         // All witness columns in row 0 are known.
-        let known_variables = self.machine_parts.witnesses.iter().map(|id| {
-            Variable::Cell(Cell {
-                column_name: self.fixed_data.column_name(id).to_string(),
-                id: id.id,
-                row_offset: 0,
-            })
+        let known_variables = all_witnesses.iter().map(|&id| self.cell(id, 0));
+        // and we want to know the ones in the next row.
+        let requested_known = all_witnesses.iter().map(|&id| self.cell(id, 1));
+        let identities = self.machine_parts.identities.iter().map(|&id| {
+            let row_offset = if id.contains_next_ref() { 0 } else { 1 };
+            (id, row_offset)
         });
-        WitgenInference::new(self.fixed_data, NoEval, known_variables)
-    }
-
-    fn process_until_no_progress<CanProcess: CanProcessCall<T> + Clone>(
-        &self,
-        can_process: CanProcess,
-        witgen: &mut WitgenInference<'a, T, NoEval>,
-        complete: &mut HashSet<u64>,
-    ) {
-        let mut progress = true;
-        while progress {
-            progress = false;
-
-            // TODO At this point, we should call a function on `witgen`
-            // to propagate known concrete values across the identities
-            // to other known (but not concrete) variables.
-
-            for id in &self.machine_parts.identities {
-                if complete.contains(&id.id()) {
-                    continue;
-                }
-                // TODO this is wrong if intermediate columns are referenced.
-                let row_offset = if id.contains_next_ref() { 0 } else { 1 };
-                let result = witgen.process_identity(can_process.clone(), id, row_offset);
-                progress |= result.progress;
-                if result.complete {
-                    complete.insert(id.id());
-                }
-            }
-        }
+        let block_size = 1;
+        let witgen = WitgenInference::new(self.fixed_data, NoEval, known_variables);
+
+        Processor::new(
+            self.fixed_data,
+            NoEval,
+            identities,
+            block_size,
+            false,
+            requested_known,
+            SINGLE_STEP_MACHINE_MAX_BRANCH_DEPTH,
+        )
+        .generate_code(can_process, witgen)
+        .map_err(|e| e.to_string())
     }
 
-    fn unknown_witness_cols_on_next_row<'b>(
-        &'b self,
-        witgen: &'b WitgenInference<'_, T, NoEval>,
-    ) -> impl Iterator<Item = &'b PolyID> + 'b {
-        self.machine_parts.witnesses.iter().filter(move |wit| {
-            !witgen.is_known(&Variable::Cell(Cell {
-                column_name: self.fixed_data.column_name(wit).to_string(),
-                id: wit.id,
-                row_offset: 1,
-            }))
+    fn cell(&self, id: PolyID, row_offset: i32) -> Variable {
+        Variable::Cell(Cell {
+            column_name: self.fixed_data.column_name(&id).to_string(),
+            id: id.id,
+            row_offset,
         })
     }
 }
@@ -236,7 +133,9 @@ mod test {
         let mutable_state = MutableState::new(machines.into_iter(), &|_| {
             Err("Query not implemented".to_string())
         });
-        SingleStepProcessor::new(&fixed_data, machine_parts).generate_code(&mutable_state)
+        SingleStepProcessor::new(&fixed_data, machine_parts)
+            .generate_code(&mutable_state)
+            .map_err(|e| e.to_string())
     }
 
     #[test]
@@ -255,8 +154,9 @@ mod test {
         let err = generate_single_step(input, "M").err().unwrap();
         assert_eq!(
             err.to_string(),
-            "Unable to derive algorithm to compute values for witness columns in the next row and\n\
-            unable to branch on a variable.\nThe following columns are still missing: M::Y"
+            "Unable to derive algorithm to compute required values: \
+            Maximum branch depth of 6 reached.\nThe following variables or values are still missing: M::Y[1]\n\
+            No code generated so far."
         );
     }
 
diff --git a/executor/src/witgen/jit/witgen_inference.rs b/executor/src/witgen/jit/witgen_inference.rs
index d497f26bfc..915e187de8 100644
--- a/executor/src/witgen/jit/witgen_inference.rs
+++ b/executor/src/witgen/jit/witgen_inference.rs
@@ -88,14 +88,10 @@ impl<'a, T: FieldElement, FixedEval: FixedEvaluator<T>> WitgenInference<'a, T, F
         }
     }
 
-    pub fn finish(self) -> Vec<Effect<T, Variable>> {
+    pub fn code(self) -> Vec<Effect<T, Variable>> {
         self.code
     }
 
-    pub fn code(&self) -> &[Effect<T, Variable>] {
-        &self.code
-    }
-
     pub fn known_variables(&self) -> &HashSet<Variable> {
         &self.known_variables
     }
@@ -126,11 +122,6 @@ impl<'a, T: FieldElement, FixedEval: FixedEvaluator<T>> WitgenInference<'a, T, F
         let rc = self.range_constraint(variable);
         assert!(rc.try_to_single_value().is_none());
 
-        log::trace!(
-            "Branching on variable {variable}, which has a range of {}",
-            rc.range_width()
-        );
-
         let (low_condition, high_condition) = rc.bisect();
 
         let common_code = std::mem::take(&mut self.code);
@@ -624,7 +615,7 @@ mod test {
             }
             assert!(counter < 10000, "Solving took more than 10000 rounds.");
         }
-        format_code(witgen.code())
+        format_code(&witgen.code())
     }
 
     #[test]
diff --git a/jit-compiler/src/compiler.rs b/jit-compiler/src/compiler.rs
index 24dc12e781..0234e49d10 100644
--- a/jit-compiler/src/compiler.rs
+++ b/jit-compiler/src/compiler.rs
@@ -1,6 +1,5 @@
 use mktemp::Temp;
 use std::{
-    env,
     fs::{self},
     process::Command,
     str::from_utf8,
@@ -200,21 +199,21 @@ pub struct PathInTempDir {
     pub path: String,
 }
 
-fn cargo_toml() -> String {
-    match env::var("POWDR_JIT_OPT_LEVEL") {
-        Ok(opt_level) => {
+fn cargo_toml(opt_level: Option<u32>) -> String {
+    match opt_level {
+        Some(opt_level) => {
             format!("{CARGO_TOML}\n\n[profile.release]\nopt-level = {opt_level}\n",)
         }
-        Err(_) => CARGO_TOML.to_string(),
+        None => CARGO_TOML.to_string(),
     }
 }
 
 /// Compiles the given code and returns the path to the
 /// temporary directory containing the compiled library
 /// and the path to the compiled library.
-pub fn call_cargo(code: &str) -> Result<PathInTempDir, String> {
+pub fn call_cargo(code: &str, opt_level: Option<u32>) -> Result<PathInTempDir, String> {
     let dir = mktemp::Temp::new_dir().unwrap();
-    fs::write(dir.join("Cargo.toml"), cargo_toml()).unwrap();
+    fs::write(dir.join("Cargo.toml"), cargo_toml(opt_level)).unwrap();
     fs::create_dir(dir.join("src")).unwrap();
     fs::write(dir.join("src").join("lib.rs"), code).unwrap();
     let output_asm = false;
diff --git a/jit-compiler/src/lib.rs b/jit-compiler/src/lib.rs
index 5cbc60c3a9..af63b999d2 100644
--- a/jit-compiler/src/lib.rs
+++ b/jit-compiler/src/lib.rs
@@ -85,7 +85,11 @@ pub fn compile<T: FieldElement>(
 
     let glue_code = generate_glue_code(&successful_symbols, analyzed)?;
 
-    let lib_file = call_cargo(&format!("{glue_code}\n{}\n", codegen.generated_code()))?;
+    let opt_level = None;
+    let lib_file = call_cargo(
+        &format!("{glue_code}\n{}\n", codegen.generated_code()),
+        opt_level,
+    )?;
     let metadata = fs::metadata(&lib_file.path).unwrap();
 
     log::info!(