Skip to content

Commit

Permalink
Switch to using Wasmtime-style builtins for ceil, floor, etc.
Browse files Browse the repository at this point in the history
And reinstate the old stack limit checks for when signal handling is
disabled.
  • Loading branch information
sunfishcode committed Dec 20, 2024
1 parent f034e72 commit f160de6
Show file tree
Hide file tree
Showing 403 changed files with 1,567 additions and 4,199 deletions.
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/aarch64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ impl TargetIsa for AArch64Backend {
true
}

fn has_round(&self) -> bool {
true
}

fn has_x86_blendv_lowering(&self, _: Type) -> bool {
false
}
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,9 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
/// not detected.
fn has_native_fma(&self) -> bool;

/// Returns whether this ISA has instructions for `ceil`, `floor`, etc.
fn has_round(&self) -> bool;

/// Returns whether the CLIF `x86_blendv` instruction is implemented for
/// this ISA for the specified type.
fn has_x86_blendv_lowering(&self, ty: Type) -> bool;
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ where
false
}

fn has_round(&self) -> bool {
false
}

fn has_x86_blendv_lowering(&self, _ty: ir::Type) -> bool {
false
}
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/riscv64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ impl TargetIsa for Riscv64Backend {
true
}

fn has_round(&self) -> bool {
true
}

fn has_x86_blendv_lowering(&self, _: Type) -> bool {
false
}
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/s390x/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ impl TargetIsa for S390xBackend {
true
}

fn has_round(&self) -> bool {
true
}

fn has_x86_blendv_lowering(&self, _: Type) -> bool {
false
}
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ impl TargetIsa for X64Backend {
self.x64_flags.use_fma()
}

fn has_round(&self) -> bool {
self.x64_flags.use_sse41()
}

fn has_x86_blendv_lowering(&self, ty: Type) -> bool {
// The `blendvpd`, `blendvps`, and `pblendvb` instructions are all only
// available from SSE 4.1 and onwards. Otherwise the i16x8 type has no
Expand Down
121 changes: 91 additions & 30 deletions crates/cranelift/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,58 @@ impl wasmtime_environ::Compiler for Compiler {

let mut func_env = FuncEnvironment::new(self, translation, types, wasm_func_ty);

if !self.tunables.signals_based_traps {
// The `stack_limit` global value below is the implementation of stack
// overflow checks in Wasmtime.
//
// The Wasm spec defines that stack overflows will raise a trap, and
// there's also an added constraint where as an embedder you frequently
// are running host-provided code called from wasm. WebAssembly and
// native code currently share the same call stack, so Wasmtime needs to
// make sure that host-provided code will have enough call-stack
// available to it.
//
// The way that stack overflow is handled here is by adding a prologue
// check to all functions for how much native stack is remaining. The
// `VMContext` pointer is the first argument to all functions, and the
// first field of this structure is `*const VMRuntimeLimits` and the
// first field of that is the stack limit. Note that the stack limit in
// this case means "if the stack pointer goes below this, trap". Each
// function which consumes stack space or isn't a leaf function starts
// off by loading the stack limit, checking it against the stack
// pointer, and optionally traps.
//
// This manual check allows the embedder to give wasm a relatively
// precise amount of stack allocation. Using this scheme we reserve a
// chunk of stack for wasm code relative from where wasm code was
// called. This ensures that native code called by wasm should have
// native stack space to run, and the numbers of stack spaces here
// should all be configurable for various embeddings.
//
// Note that this check is independent of each thread's stack guard page
// here. If the stack guard page is reached that's still considered an
// abort for the whole program since the runtime limits configured by
// the embedder should cause wasm to trap before it reaches that
// (ensuring the host has enough space as well for its functionality).
if !isa.triple().is_pulley() {
let vmctx = context
.func
.create_global_value(ir::GlobalValueData::VMContext);
let interrupts_ptr = context.func.create_global_value(ir::GlobalValueData::Load {
base: vmctx,
offset: i32::from(func_env.offsets.ptr.vmctx_runtime_limits()).into(),
global_type: isa.pointer_type(),
flags: MemFlags::trusted().with_readonly(),
});
let stack_limit = context.func.create_global_value(ir::GlobalValueData::Load {
base: interrupts_ptr,
offset: i32::from(func_env.offsets.ptr.vmruntime_limits_stack_limit()).into(),
global_type: isa.pointer_type(),
flags: MemFlags::trusted(),
});
func_env.stack_limit_at_function_entry = Some(stack_limit);
}
}
let FunctionBodyData { validator, body } = input;
let mut validator =
validator.into_validator(mem::take(&mut compiler.cx.validator_allocations));
Expand Down Expand Up @@ -346,7 +398,7 @@ impl wasmtime_environ::Compiler for Compiler {
caller_vmctx,
i32::from(ptr.vmcontext_runtime_limits()),
);
save_last_wasm_exit_fp_and_pc(&mut builder, pointer_type, &ptr, limits);
save_last_wasm_exit_fp_and_pc(&mut builder, pointer_type, &ptr, limits, &self.tunables);

// Spill all wasm arguments to the stack in `ValRaw` slots.
let (args_base, args_len) =
Expand Down Expand Up @@ -543,7 +595,13 @@ impl wasmtime_environ::Compiler for Compiler {
vmctx,
ptr_size.vmcontext_runtime_limits(),
);
save_last_wasm_exit_fp_and_pc(&mut builder, pointer_type, &ptr_size, limits);
save_last_wasm_exit_fp_and_pc(
&mut builder,
pointer_type,
&ptr_size,
limits,
&self.tunables,
);

// Now it's time to delegate to the actual builtin. Forward all our own
// arguments to the libcall itself.
Expand Down Expand Up @@ -1107,35 +1165,38 @@ fn save_last_wasm_exit_fp_and_pc(
pointer_type: ir::Type,
ptr: &impl PtrSize,
limits: Value,
tunables: &Tunables,
) {
// The Wasm spec defines that stack overflows will raise a trap, and
// there's also an added constraint where as an embedder you frequently are
// running host-provided code called from wasm. WebAssembly and native code
// currently share the same call stack, so Wasmtime needs to make sure that
// host-provided code will have enough call-stack available to it.
//
// The first field of `VMRuntimeLimits` is the stack limit. If the stack
// pointer is below this limit when we're about to call out of guest code,
// trap. But we don't check this limit as long as we stay within guest or
// trampoline code. Instead, we rely on the guest hitting a guard page,
// which the OS will tell our signal handler about. The following explicit
// check on guest exit ensures that native code called by wasm should have
// enough stack space to run without hitting a guard page.
let trampoline_sp = builder.ins().get_stack_pointer(pointer_type);
let stack_limit = builder.ins().load(
pointer_type,
MemFlags::trusted(),
limits,
ptr.vmruntime_limits_stack_limit(),
);
let is_overflow = builder.ins().icmp(
ir::condcodes::IntCC::UnsignedLessThan,
trampoline_sp,
stack_limit,
);
builder
.ins()
.trapnz(is_overflow, ir::TrapCode::STACK_OVERFLOW);
if tunables.signals_based_traps {
// The Wasm spec defines that stack overflows will raise a trap, and
// there's also an added constraint where as an embedder you frequently are
// running host-provided code called from wasm. WebAssembly and native code
// currently share the same call stack, so Wasmtime needs to make sure that
// host-provided code will have enough call-stack available to it.
//
// The first field of `VMRuntimeLimits` is the stack limit. If the stack
// pointer is below this limit when we're about to call out of guest code,
// trap. But we don't check this limit as long as we stay within guest or
// trampoline code. Instead, we rely on the guest hitting a guard page,
// which the OS will tell our signal handler about. The following explicit
// check on guest exit ensures that native code called by wasm should have
// enough stack space to run without hitting a guard page.
let trampoline_sp = builder.ins().get_stack_pointer(pointer_type);
let stack_limit = builder.ins().load(
pointer_type,
MemFlags::trusted(),
limits,
ptr.vmruntime_limits_stack_limit(),
);
let is_overflow = builder.ins().icmp(
ir::condcodes::IntCC::UnsignedLessThan,
trampoline_sp,
stack_limit,
);
builder
.ins()
.trapnz(is_overflow, ir::TrapCode::STACK_OVERFLOW);
}

// Save the exit Wasm FP to the limits. We dereference the current FP to get
// the previous FP because the current FP is the trampoline's FP, and we
Expand Down
1 change: 1 addition & 0 deletions crates/cranelift/src/compiler/component.rs
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,7 @@ impl ComponentCompiler for Compiler {
pointer_type,
&c.offsets.ptr,
limits,
&self.tunables,
);
}

Expand Down
88 changes: 88 additions & 0 deletions crates/cranelift/src/func_environ.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3255,6 +3255,94 @@ impl FuncEnvironment<'_> {
let _ = (builder, num_pages, mem_index);
}

pub fn ceil_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
if self.isa.has_round() {
builder.ins().ceil(value)
} else {
let ceil = self.builtin_functions.ceil_f32(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let call = builder.ins().call(ceil, &[vmctx, value]);
*builder.func.dfg.inst_results(call).first().unwrap()
}
}

pub fn ceil_f64(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
if self.isa.has_round() {
builder.ins().ceil(value)
} else {
let ceil = self.builtin_functions.ceil_f64(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let call = builder.ins().call(ceil, &[vmctx, value]);
*builder.func.dfg.inst_results(call).first().unwrap()
}
}

pub fn floor_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
if self.isa.has_round() {
builder.ins().floor(value)
} else {
let floor = self.builtin_functions.floor_f32(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let call = builder.ins().call(floor, &[vmctx, value]);
*builder.func.dfg.inst_results(call).first().unwrap()
}
}

pub fn floor_f64(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
if self.isa.has_round() {
builder.ins().floor(value)
} else {
let floor = self.builtin_functions.floor_f64(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let call = builder.ins().call(floor, &[vmctx, value]);
*builder.func.dfg.inst_results(call).first().unwrap()
}
}

pub fn trunc_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
if self.isa.has_round() {
builder.ins().trunc(value)
} else {
let trunc = self.builtin_functions.trunc_f32(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let call = builder.ins().call(trunc, &[vmctx, value]);
*builder.func.dfg.inst_results(call).first().unwrap()
}
}

pub fn trunc_f64(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
if self.isa.has_round() {
builder.ins().trunc(value)
} else {
let trunc = self.builtin_functions.trunc_f64(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let call = builder.ins().call(trunc, &[vmctx, value]);
*builder.func.dfg.inst_results(call).first().unwrap()
}
}

pub fn nearest_f32(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
if self.isa.has_round() {
builder.ins().nearest(value)
} else {
let nearest = self.builtin_functions.nearest_f32(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let call = builder.ins().call(nearest, &[vmctx, value]);
*builder.func.dfg.inst_results(call).first().unwrap()
}
}

pub fn nearest_f64(&mut self, builder: &mut FunctionBuilder, value: ir::Value) -> ir::Value {
if self.isa.has_round() {
builder.ins().nearest(value)
} else {
let nearest = self.builtin_functions.nearest_f64(builder.func);
let vmctx = self.vmctx_val(&mut builder.cursor());
let call = builder.ins().call(nearest, &[vmctx, value]);
*builder.func.dfg.inst_results(call).first().unwrap()
}
}

pub fn isa(&self) -> &dyn TargetIsa {
&*self.isa
}
Expand Down
12 changes: 12 additions & 0 deletions crates/cranelift/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,18 @@ impl BuiltinFunctionSignatures {
AbiParam::new(ir::types::I64)
}

fn f32(&self) -> AbiParam {
AbiParam::new(ir::types::F32)
}

fn f64(&self) -> AbiParam {
AbiParam::new(ir::types::F64)
}

fn __m128i(&self) -> AbiParam {
AbiParam::new(ir::types::I8X16)
}

fn u8(&self) -> AbiParam {
AbiParam::new(ir::types::I8)
}
Expand Down
32 changes: 24 additions & 8 deletions crates/cranelift/src/translate/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -978,21 +978,37 @@ pub fn translate_operator(
let arg = state.pop1();
state.push1(builder.ins().sqrt(arg));
}
Operator::F32Ceil | Operator::F64Ceil => {
Operator::F32Ceil => {
let arg = state.pop1();
state.push1(builder.ins().ceil(arg));
state.push1(environ.ceil_f32(builder, arg));
}
Operator::F32Floor | Operator::F64Floor => {
Operator::F64Ceil => {
let arg = state.pop1();
state.push1(builder.ins().floor(arg));
state.push1(environ.ceil_f64(builder, arg));
}
Operator::F32Trunc | Operator::F64Trunc => {
Operator::F32Floor => {
let arg = state.pop1();
state.push1(builder.ins().trunc(arg));
state.push1(environ.floor_f32(builder, arg));
}
Operator::F32Nearest | Operator::F64Nearest => {
Operator::F64Floor => {
let arg = state.pop1();
state.push1(builder.ins().nearest(arg));
state.push1(environ.floor_f64(builder, arg));
}
Operator::F32Trunc => {
let arg = state.pop1();
state.push1(environ.trunc_f32(builder, arg));
}
Operator::F64Trunc => {
let arg = state.pop1();
state.push1(environ.trunc_f64(builder, arg));
}
Operator::F32Nearest => {
let arg = state.pop1();
state.push1(environ.nearest_f32(builder, arg));
}
Operator::F64Nearest => {
let arg = state.pop1();
state.push1(environ.nearest_f64(builder, arg));
}
Operator::F32Abs | Operator::F64Abs => {
let val = state.pop1();
Expand Down
Loading

0 comments on commit f160de6

Please sign in to comment.