Minimize computation in rotate_pixel().

- Deactivate tests.
- Check if delta is on integer values.
master
Fabien Freling 2014-07-06 23:49:46 +02:00
parent a6ef15ea62
commit 67b4bcc3d3
1 changed files with 85 additions and 125 deletions

View File

@ -595,9 +595,12 @@ inline
void rotate_pixel(Image const& src, /*uint8_t* rotated,*/ void rotate_pixel(Image const& src, /*uint8_t* rotated,*/
DPoint const& src_rotated_point, /*Point const& rot_point,*/ DPoint const& src_rotated_point, /*Point const& rot_point,*/
unsigned int const src_limit, unsigned int const rot_limit, unsigned int const src_limit, unsigned int const rot_limit,
uint8_t* rotate_buffer, unsigned int rot_index) uint8_t* rotate_buffer, unsigned int rot_index,
bool full_delta)
{ {
unsigned int src_index = ((int) src_rotated_point.y * src.width + (int) src_rotated_point.x) * 3; int const src_x = (int) src_rotated_point.x;
int const src_y = (int) src_rotated_point.y;
unsigned int src_index = (src_y * src.width + src_x) * 3;
// unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3; // unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3;
// Out-of-bounds check // Out-of-bounds check
@ -614,46 +617,27 @@ void rotate_pixel(Image const& src, /*uint8_t* rotated,*/
if (src_index_4 >= src_limit) if (src_index_4 >= src_limit)
return; return;
double x_delta = src_rotated_point.x - floor(src_rotated_point.x);
round_if_very_small(x_delta);
double y_delta = src_rotated_point.y - floor(src_rotated_point.y);
round_if_very_small(y_delta);
// special case if we can directly map the src to the dest // special case if we can directly map the src to the dest
if (x_delta == 0 && y_delta == 0) if (full_delta)
{ {
// memcpy(&rotated.buffer[rot_index], &src.buffer[src_index], 3 * sizeof (uint8_t)); memcpy(&rotate_buffer[rot_index], &src.buffer[src_index], 3 * sizeof (uint8_t));
rotate_buffer[rot_index] = src.buffer[src_index];
rotate_buffer[rot_index + 1] = src.buffer[src_index + 1];
rotate_buffer[rot_index + 2] = src.buffer[src_index + 2];
return; return;
} }
// SIMD double x_delta = src_rotated_point.x - src_x;
__m128 const x_d = _mm_set_ps1(x_delta); //round_if_very_small(x_delta);
__m128 const inv_x_d = _mm_set_ps1(1 - x_delta); double y_delta = src_rotated_point.y - src_y;
__m128 top_left = _mm_set_ps(src.buffer[src_index_1], src.buffer[src_index_1 + 1], src.buffer[src_index_1 + 2], 0.0); //round_if_very_small(y_delta);
__m128 top_right = _mm_set_ps(src.buffer[src_index_2], src.buffer[src_index_2 + 1], src.buffer[src_index_2 + 2], 0.0); double const inv_x = 1 - x_delta;
top_left = _mm_mul_ps(top_left, inv_x_d); double const inv_y = 1 - y_delta;
top_right = _mm_mul_ps(top_right, x_d);
top_left = _mm_add_ps(top_left, top_right);
__m128 bottom_left = _mm_set_ps(src.buffer[src_index_3], src.buffer[src_index_3 + 1], src.buffer[src_index_3 + 2], 0.0); // No SIMD
__m128 bottom_right = _mm_set_ps(src.buffer[src_index_4], src.buffer[src_index_4 + 1], src.buffer[src_index_4 + 2], 0.0); rotate_buffer[rot_index] = (src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y
bottom_left = _mm_mul_ps(bottom_left, inv_x_d); + (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta;
bottom_right = _mm_mul_ps(bottom_right, x_d); rotate_buffer[rot_index + 1] = (src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
bottom_left = _mm_add_ps(bottom_left, bottom_right); + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta;
rotate_buffer[rot_index + 2] = (src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
__m128 const y_d = _mm_set_ps1(y_delta); + (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta;
__m128 const inv_y_d = _mm_set_ps1(1 - y_delta);
top_left = _mm_mul_ps(top_left, inv_y_d);
bottom_left = _mm_mul_ps(bottom_left, y_d);
top_left = _mm_add_ps(top_left, bottom_left);
// convert float values to uint8_t
rotate_buffer[rot_index] = top_left[3];
rotate_buffer[rot_index + 1] = top_left[2];
rotate_buffer[rot_index + 2] = top_left[1];
} }
Image* rotate(Image const& src, double angle) Image* rotate(Image const& src, double angle)
@ -689,6 +673,12 @@ Image* rotate(Image const& src, double angle)
src_delta_y.y = src_delta_y.y - src_origin.y; src_delta_y.y = src_delta_y.y - src_origin.y;
round_if_very_small(src_delta_y.x); round_if_very_small(src_delta_y.x);
round_if_very_small(src_delta_y.y); round_if_very_small(src_delta_y.y);
bool full_delta = false;
if (src_delta_x.x - (int) src_delta_x.x == 0
&& src_delta_x.y - (int) src_delta_x.y == 0
&& src_delta_y.x - (int) src_delta_y.x == 0
&& src_delta_y.y - (int) src_delta_y.y == 0)
full_delta = true;
// // steps for first column in source image (y) // // steps for first column in source image (y)
@ -705,57 +695,19 @@ Image* rotate(Image const& src, double angle)
unsigned int const src_limit = src.width * src.height * 3; unsigned int const src_limit = src.width * src.height * 3;
unsigned int const rot_limit = rotated->width * rotated->height * 3; unsigned int const rot_limit = rotated->width * rotated->height * 3;
// for (int y_i = 0; y_i <= (int) origin_nb_steps; ++y_i)
// {
// // first column origin
// Point const rot_origin(tl.x + y_i * rotated_step.x, tl.y + y_i * rotated_step.y);
// Point rot_point(rot_origin.x, rot_origin.y);
// DPoint rot_delta(0.0, 0.0);
//
// Point previous = rot_origin;
//
// for (int x_i = 0; x_i <= (int) line_nb_steps; ++x_i)
// {
//
// Point const delta(rot_point.x - tl.x, rot_point.y - tl.y);
// DPoint src_rotated_point(src_tl.x + delta.x * src_delta_x.x + delta.y * src_delta_y.x,
// src_tl.y + delta.x * src_delta_x.y + delta.y * src_delta_y.y);
//
// rotate_pixel(src, *rotated, src_rotated_point, rot_point, src_limit, rot_limit);
//
// if (previous.x != rot_point.x && previous.y != rot_point.y)
// {
// int y_slope = rot_point.y > previous.y ? 1 : -1;
// int tmp_y = rot_point.y;
// rot_point.y = previous.y;
//
// src_rotated_point.x -= y_slope * src_delta_y.x;
// src_rotated_point.y -= y_slope * src_delta_y.y;
//
// rotate_pixel(src, *rotated, src_rotated_point, rot_point, src_limit, rot_limit);
//
// rot_point.y = tmp_y;
// }
//
// previous = rot_point;
//
// rot_delta.x += bresenham.x;
// rot_point.x = rot_origin.x + (int) rot_delta.x;
//
// rot_delta.y += bresenham.y;
// rot_point.y = rot_origin.y + (int) rot_delta.y;
// }
// }
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation); DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid); DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
unsigned int const buffer_size = rotated->width * 3; unsigned int const buffer_pixel_capacity = 128 / 3;
unsigned int const buffer_size = buffer_pixel_capacity * 3;
unsigned int buffer_index = 0;
uint8_t buffer[buffer_size]; uint8_t buffer[buffer_size];
memset(buffer, 0, buffer_size);
unsigned int buffer_offset = 0;
for (unsigned int y = 0; y < rotated->height; ++y) for (unsigned int y = 0; y < rotated->height; ++y)
{ {
memset(buffer, 0, buffer_size); //memset(buffer, 0, buffer_size);
DPoint src_rotated_point(rot_origin_in_src.x + y * src_delta_y.x, DPoint src_rotated_point(rot_origin_in_src.x + y * src_delta_y.x,
rot_origin_in_src.y + y * src_delta_y.y); rot_origin_in_src.y + y * src_delta_y.y);
@ -764,13 +716,22 @@ Image* rotate(Image const& src, double angle)
if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width
&& src_rotated_point.y >= 0 && src_rotated_point.y < src.height) && src_rotated_point.y >= 0 && src_rotated_point.y < src.height)
{ {
rotate_pixel(src, src_rotated_point, src_limit, rot_limit, buffer, x * 3); rotate_pixel(src, src_rotated_point,
src_limit, rot_limit,
buffer, buffer_index * 3, full_delta);
} }
src_rotated_point.x += src_delta_x.x; src_rotated_point.x += src_delta_x.x;
src_rotated_point.y += src_delta_x.y; src_rotated_point.y += src_delta_x.y;
++buffer_index;
if (buffer_index == buffer_pixel_capacity)
{
memcpy(rotated->buffer + buffer_offset, buffer, buffer_size);
buffer_offset += buffer_size;
buffer_index = 0;
memset(buffer, 0, buffer_size);
}
} }
memcpy(rotated->buffer + buffer_size * y, buffer, buffer_size);
} }
return rotated; return rotated;
@ -784,55 +745,42 @@ Image* rotate(Image const& src, double angle)
template<unsigned int W, unsigned int H> template<unsigned int W, unsigned int H>
void rotate_pixel(TiledImage<W, H> const& src, void rotate_pixel(TiledImage<W, H> const& src,
DPoint const& src_rotated_point, DPoint const& src_rotated_point,
uint8_t* rot_tile, unsigned int rot_index) uint8_t* rot_tile, unsigned int rot_index,
bool full_delta)
{ {
uint8_t const* src_index_1 = src.access_pixel((int) src_rotated_point.x, (int) src_rotated_point.y); int const src_x = (int) src_rotated_point.x;
int const src_y = (int) src_rotated_point.y;
double x_delta = src_rotated_point.x - (int) src_rotated_point.x; uint8_t const* src_index_1 = src.access_pixel(src_x, src_y);
round_if_very_small(x_delta);
double y_delta = src_rotated_point.y - (int) src_rotated_point.y;
round_if_very_small(y_delta);
// special case if we can directly map the src to the dest // special case if we can directly map the src to the dest
if (x_delta == 0 && y_delta == 0) if (full_delta)
{ {
memcpy(&rot_tile[rot_index], src_index_1, 3 * sizeof (uint8_t)); memcpy(&rot_tile[rot_index], src_index_1, 3 * sizeof (uint8_t));
return; return;
} }
uint8_t const* src_index_2 = src.access_pixel((int) src_rotated_point.x + 1, (int) src_rotated_point.y); double x_delta = src_rotated_point.x - src_x;
uint8_t const* src_index_3 = src.access_pixel((int) src_rotated_point.x, (int) src_rotated_point.y + 1); //round_if_very_small(x_delta);
uint8_t const* src_index_4 = src.access_pixel((int) src_rotated_point.x + 1, (int) src_rotated_point.y + 1); double y_delta = src_rotated_point.y - src_y;
//round_if_very_small(y_delta);
double const inv_x = 1 - x_delta;
double const inv_y = 1 - y_delta;
uint8_t const* src_index_2 = src.access_pixel(src_x + 1, src_y);
uint8_t const* src_index_3 = src.access_pixel(src_x, src_y + 1);
uint8_t const* src_index_4 = src.access_pixel(src_x + 1, src_y + 1);
// FIXME: deal with image border // FIXME: deal with image border
if (!src_index_1 || !src_index_2 || !src_index_3 || !src_index_4) if (!src_index_4)
return; return;
// SIMD // No SIMD
__m128 const x_d = _mm_set_ps1(x_delta); rot_tile[rot_index] = (src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y
__m128 const inv_x_d = _mm_set_ps1(1 - x_delta); + (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta;
__m128 top_left = _mm_set_ps(*src_index_1, *(src_index_1 + 1), *(src_index_1 + 2), 0.0); rot_tile[rot_index + 1] = (src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y
__m128 top_right = _mm_set_ps(*src_index_2, *(src_index_2 + 1), *(src_index_2 + 2), 0.0); + (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta;
top_left = _mm_mul_ps(top_left, inv_x_d); rot_tile[rot_index + 2] = (src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y
top_right = _mm_mul_ps(top_right, x_d); + (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta;
top_left = _mm_add_ps(top_left, top_right);
__m128 bottom_left = _mm_set_ps(*src_index_3, *(src_index_3 + 1), *(src_index_3 + 2), 0.0);
__m128 bottom_right = _mm_set_ps(*src_index_4, *(src_index_4 + 1), *(src_index_4 + 2), 0.0);
bottom_left = _mm_mul_ps(bottom_left, inv_x_d);
bottom_right = _mm_mul_ps(bottom_right, x_d);
bottom_left = _mm_add_ps(bottom_left, bottom_right);
__m128 const y_d = _mm_set_ps1(y_delta);
__m128 const inv_y_d = _mm_set_ps1(1 - y_delta);
top_left = _mm_mul_ps(top_left, inv_y_d);
bottom_left = _mm_mul_ps(bottom_left, y_d);
top_left = _mm_add_ps(top_left, bottom_left);
// convert float values to uint8_t
rot_tile[rot_index] = top_left[3];
rot_tile[rot_index + 1] = top_left[2];
rot_tile[rot_index + 2] = top_left[1];
} }
template<unsigned int W, unsigned int H> template<unsigned int W, unsigned int H>
@ -857,15 +805,26 @@ rotate(TiledImage<W, H> const& src, double angle)
src_delta_y.y = src_delta_y.y - src_origin.y; src_delta_y.y = src_delta_y.y - src_origin.y;
round_if_very_small(src_delta_y.x); round_if_very_small(src_delta_y.x);
round_if_very_small(src_delta_y.y); round_if_very_small(src_delta_y.y);
bool full_delta = false;
if (src_delta_x.x - (int) src_delta_x.x == 0
&& src_delta_x.y - (int) src_delta_x.y == 0
&& src_delta_y.x - (int) src_delta_y.x == 0
&& src_delta_y.y - (int) src_delta_y.y == 0)
full_delta = true;
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation); DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid); DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
uint8_t tile[W * H * 3];
memset(tile, 0, W * H * 3);
for (unsigned int y = 0; y < rotated->nb_row_tile; ++y) for (unsigned int y = 0; y < rotated->nb_row_tile; ++y)
{ {
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x) for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
{ {
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x; unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
unsigned int rot_index = 0;
for (unsigned int j = 0; j < H; ++j) for (unsigned int j = 0; j < H; ++j)
{ {
@ -876,20 +835,21 @@ rotate(TiledImage<W, H> const& src, double angle)
for (unsigned int i = 0; i < W; ++i) for (unsigned int i = 0; i < W; ++i)
{ {
unsigned int const rot_index = (j * W + i) * 3;
if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width
&& src_rotated_point.y >= 0 && src_rotated_point.y < src.height) && src_rotated_point.y >= 0 && src_rotated_point.y < src.height)
{ {
rotate_pixel(src, src_rotated_point, rotate_pixel(src, src_rotated_point,
rotated->tiles[rot_tile_index], rot_index); tile, rot_index, full_delta);
} }
src_rotated_point.x += src_delta_x.x; src_rotated_point.x += src_delta_x.x;
src_rotated_point.y += src_delta_x.y; src_rotated_point.y += src_delta_x.y;
rot_index += 3;
} }
} }
memcpy(rotated->tiles[rot_tile_index], tile, W * H * 3);
memset(tile, 0, W * H * 3);
} }
} }
@ -1059,7 +1019,7 @@ int main(int argc, char* argv[])
return 1; return 1;
} }
bool perform_check = true; bool perform_check = false;
if (perform_check) if (perform_check)
{ {
@ -1077,7 +1037,7 @@ int main(int argc, char* argv[])
} }
Image img(argv[1]); Image img(argv[1]);
TiledImage<32, 32> tiled_img(argv[1]); TiledImage<8, 8> tiled_img(argv[1]);
for (double rotation = 0; rotation < 360; rotation += 45) for (double rotation = 0; rotation < 360; rotation += 45)
{ {