Minimize computation in rotate_pixel().

- Deactivate tests.
- Check if delta is on integer values.
master
Fabien Freling 2014-07-06 23:49:46 +02:00
parent a6ef15ea62
commit 67b4bcc3d3
1 changed files with 85 additions and 125 deletions

View File

@ -595,9 +595,12 @@ inline
void rotate_pixel(Image const& src, /*uint8_t* rotated,*/
DPoint const& src_rotated_point, /*Point const& rot_point,*/
unsigned int const src_limit, unsigned int const rot_limit,
uint8_t* rotate_buffer, unsigned int rot_index)
uint8_t* rotate_buffer, unsigned int rot_index,
bool full_delta)
{
unsigned int src_index = ((int) src_rotated_point.y * src.width + (int) src_rotated_point.x) * 3;
int const src_x = (int) src_rotated_point.x;
int const src_y = (int) src_rotated_point.y;
unsigned int src_index = (src_y * src.width + src_x) * 3;
// unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3;
// Out-of-bounds check
@ -614,46 +617,27 @@ void rotate_pixel(Image const& src, /*uint8_t* rotated,*/
if (src_index_4 >= src_limit)
return;
double x_delta = src_rotated_point.x - floor(src_rotated_point.x);
round_if_very_small(x_delta);
double y_delta = src_rotated_point.y - floor(src_rotated_point.y);
round_if_very_small(y_delta);
// special case if we can directly map the src to the dest
if (x_delta == 0 && y_delta == 0)
if (full_delta)
{
// memcpy(&rotated.buffer[rot_index], &src.buffer[src_index], 3 * sizeof (uint8_t));
rotate_buffer[rot_index] = src.buffer[src_index];
rotate_buffer[rot_index + 1] = src.buffer[src_index + 1];
rotate_buffer[rot_index + 2] = src.buffer[src_index + 2];
memcpy(&rotate_buffer[rot_index], &src.buffer[src_index], 3 * sizeof (uint8_t));
return;
}
// SIMD
__m128 const x_d = _mm_set_ps1(x_delta);
__m128 const inv_x_d = _mm_set_ps1(1 - x_delta);
__m128 top_left = _mm_set_ps(src.buffer[src_index_1], src.buffer[src_index_1 + 1], src.buffer[src_index_1 + 2], 0.0);
__m128 top_right = _mm_set_ps(src.buffer[src_index_2], src.buffer[src_index_2 + 1], src.buffer[src_index_2 + 2], 0.0);
top_left = _mm_mul_ps(top_left, inv_x_d);
top_right = _mm_mul_ps(top_right, x_d);
top_left = _mm_add_ps(top_left, top_right);
double x_delta = src_rotated_point.x - src_x;
//round_if_very_small(x_delta);
double y_delta = src_rotated_point.y - src_y;
//round_if_very_small(y_delta);
double const inv_x = 1 - x_delta;
double const inv_y = 1 - y_delta;
__m128 bottom_left = _mm_set_ps(src.buffer[src_index_3], src.buffer[src_index_3 + 1], src.buffer[src_index_3 + 2], 0.0);
__m128 bottom_right = _mm_set_ps(src.buffer[src_index_4], src.buffer[src_index_4 + 1], src.buffer[src_index_4 + 2], 0.0);
bottom_left = _mm_mul_ps(bottom_left, inv_x_d);
bottom_right = _mm_mul_ps(bottom_right, x_d);
bottom_left = _mm_add_ps(bottom_left, bottom_right);
__m128 const y_d = _mm_set_ps1(y_delta);
__m128 const inv_y_d = _mm_set_ps1(1 - y_delta);
top_left = _mm_mul_ps(top_left, inv_y_d);
bottom_left = _mm_mul_ps(bottom_left, y_d);
top_left = _mm_add_ps(top_left, bottom_left);
// convert float values to uint8_t
rotate_buffer[rot_index] = top_left[3];
rotate_buffer[rot_index + 1] = top_left[2];
rotate_buffer[rot_index + 2] = top_left[1];
// No SIMD
rotate_buffer[rot_index] = (src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y
+ (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta;
rotate_buffer[rot_index + 1] = (src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
+ (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta;
rotate_buffer[rot_index + 2] = (src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
+ (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta;
}
Image* rotate(Image const& src, double angle)
@ -689,6 +673,12 @@ Image* rotate(Image const& src, double angle)
src_delta_y.y = src_delta_y.y - src_origin.y;
round_if_very_small(src_delta_y.x);
round_if_very_small(src_delta_y.y);
bool full_delta = false;
if (src_delta_x.x - (int) src_delta_x.x == 0
&& src_delta_x.y - (int) src_delta_x.y == 0
&& src_delta_y.x - (int) src_delta_y.x == 0
&& src_delta_y.y - (int) src_delta_y.y == 0)
full_delta = true;
// // steps for first column in source image (y)
@ -705,57 +695,19 @@ Image* rotate(Image const& src, double angle)
unsigned int const src_limit = src.width * src.height * 3;
unsigned int const rot_limit = rotated->width * rotated->height * 3;
// for (int y_i = 0; y_i <= (int) origin_nb_steps; ++y_i)
// {
// // first column origin
// Point const rot_origin(tl.x + y_i * rotated_step.x, tl.y + y_i * rotated_step.y);
// Point rot_point(rot_origin.x, rot_origin.y);
// DPoint rot_delta(0.0, 0.0);
//
// Point previous = rot_origin;
//
// for (int x_i = 0; x_i <= (int) line_nb_steps; ++x_i)
// {
//
// Point const delta(rot_point.x - tl.x, rot_point.y - tl.y);
// DPoint src_rotated_point(src_tl.x + delta.x * src_delta_x.x + delta.y * src_delta_y.x,
// src_tl.y + delta.x * src_delta_x.y + delta.y * src_delta_y.y);
//
// rotate_pixel(src, *rotated, src_rotated_point, rot_point, src_limit, rot_limit);
//
// if (previous.x != rot_point.x && previous.y != rot_point.y)
// {
// int y_slope = rot_point.y > previous.y ? 1 : -1;
// int tmp_y = rot_point.y;
// rot_point.y = previous.y;
//
// src_rotated_point.x -= y_slope * src_delta_y.x;
// src_rotated_point.y -= y_slope * src_delta_y.y;
//
// rotate_pixel(src, *rotated, src_rotated_point, rot_point, src_limit, rot_limit);
//
// rot_point.y = tmp_y;
// }
//
// previous = rot_point;
//
// rot_delta.x += bresenham.x;
// rot_point.x = rot_origin.x + (int) rot_delta.x;
//
// rot_delta.y += bresenham.y;
// rot_point.y = rot_origin.y + (int) rot_delta.y;
// }
// }
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
unsigned int const buffer_size = rotated->width * 3;
unsigned int const buffer_pixel_capacity = 128 / 3;
unsigned int const buffer_size = buffer_pixel_capacity * 3;
unsigned int buffer_index = 0;
uint8_t buffer[buffer_size];
memset(buffer, 0, buffer_size);
unsigned int buffer_offset = 0;
for (unsigned int y = 0; y < rotated->height; ++y)
{
memset(buffer, 0, buffer_size);
//memset(buffer, 0, buffer_size);
DPoint src_rotated_point(rot_origin_in_src.x + y * src_delta_y.x,
rot_origin_in_src.y + y * src_delta_y.y);
@ -764,13 +716,22 @@ Image* rotate(Image const& src, double angle)
if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width
&& src_rotated_point.y >= 0 && src_rotated_point.y < src.height)
{
rotate_pixel(src, src_rotated_point, src_limit, rot_limit, buffer, x * 3);
rotate_pixel(src, src_rotated_point,
src_limit, rot_limit,
buffer, buffer_index * 3, full_delta);
}
src_rotated_point.x += src_delta_x.x;
src_rotated_point.y += src_delta_x.y;
++buffer_index;
if (buffer_index == buffer_pixel_capacity)
{
memcpy(rotated->buffer + buffer_offset, buffer, buffer_size);
buffer_offset += buffer_size;
buffer_index = 0;
memset(buffer, 0, buffer_size);
}
}
memcpy(rotated->buffer + buffer_size * y, buffer, buffer_size);
}
return rotated;
@ -784,55 +745,42 @@ Image* rotate(Image const& src, double angle)
template<unsigned int W, unsigned int H>
void rotate_pixel(TiledImage<W, H> const& src,
DPoint const& src_rotated_point,
uint8_t* rot_tile, unsigned int rot_index)
uint8_t* rot_tile, unsigned int rot_index,
bool full_delta)
{
uint8_t const* src_index_1 = src.access_pixel((int) src_rotated_point.x, (int) src_rotated_point.y);
double x_delta = src_rotated_point.x - (int) src_rotated_point.x;
round_if_very_small(x_delta);
double y_delta = src_rotated_point.y - (int) src_rotated_point.y;
round_if_very_small(y_delta);
int const src_x = (int) src_rotated_point.x;
int const src_y = (int) src_rotated_point.y;
uint8_t const* src_index_1 = src.access_pixel(src_x, src_y);
// special case if we can directly map the src to the dest
if (x_delta == 0 && y_delta == 0)
if (full_delta)
{
memcpy(&rot_tile[rot_index], src_index_1, 3 * sizeof (uint8_t));
return;
}
uint8_t const* src_index_2 = src.access_pixel((int) src_rotated_point.x + 1, (int) src_rotated_point.y);
uint8_t const* src_index_3 = src.access_pixel((int) src_rotated_point.x, (int) src_rotated_point.y + 1);
uint8_t const* src_index_4 = src.access_pixel((int) src_rotated_point.x + 1, (int) src_rotated_point.y + 1);
double x_delta = src_rotated_point.x - src_x;
//round_if_very_small(x_delta);
double y_delta = src_rotated_point.y - src_y;
//round_if_very_small(y_delta);
double const inv_x = 1 - x_delta;
double const inv_y = 1 - y_delta;
uint8_t const* src_index_2 = src.access_pixel(src_x + 1, src_y);
uint8_t const* src_index_3 = src.access_pixel(src_x, src_y + 1);
uint8_t const* src_index_4 = src.access_pixel(src_x + 1, src_y + 1);
// FIXME: deal with image border
if (!src_index_1 || !src_index_2 || !src_index_3 || !src_index_4)
if (!src_index_4)
return;
// SIMD
__m128 const x_d = _mm_set_ps1(x_delta);
__m128 const inv_x_d = _mm_set_ps1(1 - x_delta);
__m128 top_left = _mm_set_ps(*src_index_1, *(src_index_1 + 1), *(src_index_1 + 2), 0.0);
__m128 top_right = _mm_set_ps(*src_index_2, *(src_index_2 + 1), *(src_index_2 + 2), 0.0);
top_left = _mm_mul_ps(top_left, inv_x_d);
top_right = _mm_mul_ps(top_right, x_d);
top_left = _mm_add_ps(top_left, top_right);
__m128 bottom_left = _mm_set_ps(*src_index_3, *(src_index_3 + 1), *(src_index_3 + 2), 0.0);
__m128 bottom_right = _mm_set_ps(*src_index_4, *(src_index_4 + 1), *(src_index_4 + 2), 0.0);
bottom_left = _mm_mul_ps(bottom_left, inv_x_d);
bottom_right = _mm_mul_ps(bottom_right, x_d);
bottom_left = _mm_add_ps(bottom_left, bottom_right);
__m128 const y_d = _mm_set_ps1(y_delta);
__m128 const inv_y_d = _mm_set_ps1(1 - y_delta);
top_left = _mm_mul_ps(top_left, inv_y_d);
bottom_left = _mm_mul_ps(bottom_left, y_d);
top_left = _mm_add_ps(top_left, bottom_left);
// convert float values to uint8_t
rot_tile[rot_index] = top_left[3];
rot_tile[rot_index + 1] = top_left[2];
rot_tile[rot_index + 2] = top_left[1];
// No SIMD
rot_tile[rot_index] = (src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y
+ (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta;
rot_tile[rot_index + 1] = (src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y
+ (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta;
rot_tile[rot_index + 2] = (src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y
+ (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta;
}
template<unsigned int W, unsigned int H>
@ -857,15 +805,26 @@ rotate(TiledImage<W, H> const& src, double angle)
src_delta_y.y = src_delta_y.y - src_origin.y;
round_if_very_small(src_delta_y.x);
round_if_very_small(src_delta_y.y);
bool full_delta = false;
if (src_delta_x.x - (int) src_delta_x.x == 0
&& src_delta_x.y - (int) src_delta_x.y == 0
&& src_delta_y.x - (int) src_delta_y.x == 0
&& src_delta_y.y - (int) src_delta_y.y == 0)
full_delta = true;
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
uint8_t tile[W * H * 3];
memset(tile, 0, W * H * 3);
for (unsigned int y = 0; y < rotated->nb_row_tile; ++y)
{
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
{
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
unsigned int rot_index = 0;
for (unsigned int j = 0; j < H; ++j)
{
@ -876,20 +835,21 @@ rotate(TiledImage<W, H> const& src, double angle)
for (unsigned int i = 0; i < W; ++i)
{
unsigned int const rot_index = (j * W + i) * 3;
if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width
&& src_rotated_point.y >= 0 && src_rotated_point.y < src.height)
{
rotate_pixel(src, src_rotated_point,
rotated->tiles[rot_tile_index], rot_index);
tile, rot_index, full_delta);
}
src_rotated_point.x += src_delta_x.x;
src_rotated_point.y += src_delta_x.y;
rot_index += 3;
}
}
memcpy(rotated->tiles[rot_tile_index], tile, W * H * 3);
memset(tile, 0, W * H * 3);
}
}
@ -1059,7 +1019,7 @@ int main(int argc, char* argv[])
return 1;
}
bool perform_check = true;
bool perform_check = false;
if (perform_check)
{
@ -1077,7 +1037,7 @@ int main(int argc, char* argv[])
}
Image img(argv[1]);
TiledImage<32, 32> tiled_img(argv[1]);
TiledImage<8, 8> tiled_img(argv[1]);
for (double rotation = 0; rotation < 360; rotation += 45)
{