Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added simple tile size calculation #2

Open
wants to merge 12 commits into
base: impala
Choose a base branch
from
137 changes: 124 additions & 13 deletions mapping_cpu.impala
Original file line number Diff line number Diff line change
Expand Up @@ -53,27 +53,137 @@ fn @iteration_bounds(math: Intrinsics, out: Img, arr: Img, mask: Mask, bh_lower:
}
}

fn @tiled_loop(xl: i32, xu: i32, yl: i32, yu: i32, body: fn(i32, i32) -> ()) -> () {
fn @get_step(lvl: i32) -> i32 { // size -> 2048 -> 1
if lvl == 0 { 2048 } else { 1 }
struct CacheSizes {
sizes : [i32 * 3],
count : i32,
}

fn @tiled_loop(xl: i32, xu: i32, yl: i32, yu: i32, mask: Mask, img: Img, body: fn(i32, i32) -> ()) -> () {
let debug_tiling = false;

// analyze stencil
fn @(?idx & ?mask) distances(idx: (i32, i32), last_idx: (i32, i32), sum_dist: (i32, i32), max_dist: (i32, i32)) -> ((i32, i32), (i32, i32)) {
if idx(0) >= mask.size_x {
// end of mask line
distances((0, idx(1)+1), last_idx, sum_dist, max_dist)
} else if idx(1) >= mask.size_y {
// end of stencil mask
(sum_dist, max_dist)
} else if mask.data(idx(0), idx(1)) == 0.0f {
// ignore zero mask values
distances((idx(0)+1, idx(1)), last_idx, sum_dist, max_dist)
} else if last_idx(0) == -1 {
// first non-zero mask value found
// last_idx = idx
distances((idx(0)+1, idx(1)), idx, sum_dist, max_dist)
} else {
// calculate distance and update sum and max
let dist = (idx(0) - last_idx(0),
idx(1) - last_idx(1));
let mut new_max_dist = max_dist;
if dist(1) > max_dist(1) || (dist(1) == max_dist(1) && dist(0) > max_dist(0)) {
new_max_dist = dist;
}
distances((idx(0)+1, idx(1)), idx,
(sum_dist(0)+dist(0), sum_dist(1)+dist(1)),
new_max_dist)
}
}
let (sum_distances, max_distance) = distances((0,0), (-1,-1), (0,0), (0,0));

pe_info("sum", sum_distances);
pe_info("max", max_distance);

fn @get_tile_dims(mask: Mask, img: Img) -> (i32, i32) {
// TODO make this dimension independent, currently only 2D is supported
// TODO get cache information from machine/compiler/somewhere else

let caches = CacheSizes { sizes : [ 32*1024, 256*1024, 20*1024*1024 ], count : 3}; // in bytes
let element_size = 4; // TODO make this dynamic based on used type. templates?

if debug_tiling {
print_string(" (max, sum) = ((");
print_i32(max_distance(0));
print_string(", ");
print_i32(max_distance(1));
print_string("), (");
print_i32(sum_distances(0));
print_string(", ");
print_i32(sum_distances(1));
print_string(")) = ");
print_i32(max_distance(0)+sum_distances(0)+(max_distance(1)+sum_distances(1))*img.width);
print_string("\n");
}

fn @(?cache_lvl) cache_tile(cache_lvl: i32, max_tile_size: i32) -> i32 {
let min_loop_length = 200; // controls when loop should be blocked

if cache_lvl < 0 {
max_tile_size
} else {
let proposed_tile_size = (((caches.sizes(cache_lvl) / element_size
- max_distance(0) - sum_distances(0)) /
(max_distance(1)+sum_distances(1))
* 90 / 100)>>6)<<6;
if proposed_tile_size < max_tile_size && proposed_tile_size > min_loop_length {
cache_tile(cache_lvl - 1, proposed_tile_size)
} else {
cache_tile(cache_lvl - 1, max_tile_size)
}
}
}
let tile_size = cache_tile(caches.count - 1, img.width);

if debug_tiling {
pe_info("tile_size", tile_size);
print_string("\nRecommended inner tiling: ");
print_i32(tile_size);
print_string("\n");
}

(tile_size, img.height)
}

let x_upper = xu;
let x_lower = xl;
let y_upper = yu;
let y_lower = yl;

fn @(?cur_lvl) tile(cur_lvl: i32, xl: i32, xu: i32, yl: i32, yu: i32) -> () {
let step = get_step(cur_lvl);
pe_info("step size", step);
if debug_tiling {
print_string("tile(");
print_i32(cur_lvl);
print_string(", ");
print_i32(xl);
print_string(", ");
print_i32(xu);
print_string(", ");
print_i32(yl);
print_string(", ");
print_i32(yu);
print_string(")\n");
}

if step == 1 {
for y in range(yl, yu) {
for x in range(xl, min(xu, x_upper)) {
@@body(x, y);
let (xtile_dim, ytile_dim) = get_tile_dims(mask, img);

pe_info("tile dim (x, y)", (xtile_dim, ytile_dim));
pe_info("x (lower, upper)", (xl, xu));
pe_info("y (lower, upper)", (yl, yu));

if cur_lvl == 0 {
// we always start from 0 to align tiles, no matter what boundary is skipped over
for x in range_step(0, xu, xtile_dim) {
for y in range_step(0, yu, ytile_dim) {
tile(cur_lvl + 1,
max(x, xl), min(x + xtile_dim, xu),
max(y, yl), min(y + ytile_dim, yu));
}
}
} else {
let step = if xl == x_lower { step - xl } else { step };
for x in range_step(xl, xu, step) {
tile(cur_lvl + 1, x, x + step, yl, yu);
for y in range(yl, yu) {
for x in range(xl, xu) {
@@body(x, y);
}
}
}
}
Expand All @@ -95,7 +205,7 @@ fn @iteration_advanced(math: Intrinsics, out: Img, arr: Img, mask: Mask, bh_lowe
// gets slower when computed in parallel

match region {
(Boundary::Center, Boundary::Center) => tiled_loop(bounds_row(0), bounds_row(1), bounds_col(0), bounds_col(1), @|x, y| body(math, x, y, out_acc, arr_acc, mask)),
(Boundary::Center, Boundary::Center) => tiled_loop(bounds_row(0), bounds_row(1), bounds_col(0), bounds_col(1), mask, arr, @|x, y| body(math, x, y, out_acc, arr_acc, mask)),
_ =>
for y in range(bounds_col(0), bounds_col(1)) {
for x in inner_loop(bounds_row(0), bounds_row(1)) {
Expand Down Expand Up @@ -211,6 +321,7 @@ fn @iteration_sep_advanced(math: Intrinsics, out: Img, arr: Img, mask_row: MaskS
}
}
}

release(tmp);
}
}
Expand Down