Minimize computation in rotate_pixel().
- Deactivate tests. - Check if delta is on integer values.
This commit is contained in:
parent
a6ef15ea62
commit
67b4bcc3d3
210
rotation.cpp
210
rotation.cpp
|
@ -595,9 +595,12 @@ inline
|
||||||
void rotate_pixel(Image const& src, /*uint8_t* rotated,*/
|
void rotate_pixel(Image const& src, /*uint8_t* rotated,*/
|
||||||
DPoint const& src_rotated_point, /*Point const& rot_point,*/
|
DPoint const& src_rotated_point, /*Point const& rot_point,*/
|
||||||
unsigned int const src_limit, unsigned int const rot_limit,
|
unsigned int const src_limit, unsigned int const rot_limit,
|
||||||
uint8_t* rotate_buffer, unsigned int rot_index)
|
uint8_t* rotate_buffer, unsigned int rot_index,
|
||||||
|
bool full_delta)
|
||||||
{
|
{
|
||||||
unsigned int src_index = ((int) src_rotated_point.y * src.width + (int) src_rotated_point.x) * 3;
|
int const src_x = (int) src_rotated_point.x;
|
||||||
|
int const src_y = (int) src_rotated_point.y;
|
||||||
|
unsigned int src_index = (src_y * src.width + src_x) * 3;
|
||||||
// unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3;
|
// unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3;
|
||||||
|
|
||||||
// Out-of-bounds check
|
// Out-of-bounds check
|
||||||
|
@ -614,46 +617,27 @@ void rotate_pixel(Image const& src, /*uint8_t* rotated,*/
|
||||||
if (src_index_4 >= src_limit)
|
if (src_index_4 >= src_limit)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
double x_delta = src_rotated_point.x - floor(src_rotated_point.x);
|
|
||||||
round_if_very_small(x_delta);
|
|
||||||
double y_delta = src_rotated_point.y - floor(src_rotated_point.y);
|
|
||||||
round_if_very_small(y_delta);
|
|
||||||
|
|
||||||
// special case if we can directly map the src to the dest
|
// special case if we can directly map the src to the dest
|
||||||
if (x_delta == 0 && y_delta == 0)
|
if (full_delta)
|
||||||
{
|
{
|
||||||
// memcpy(&rotated.buffer[rot_index], &src.buffer[src_index], 3 * sizeof (uint8_t));
|
memcpy(&rotate_buffer[rot_index], &src.buffer[src_index], 3 * sizeof (uint8_t));
|
||||||
rotate_buffer[rot_index] = src.buffer[src_index];
|
|
||||||
rotate_buffer[rot_index + 1] = src.buffer[src_index + 1];
|
|
||||||
rotate_buffer[rot_index + 2] = src.buffer[src_index + 2];
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD
|
double x_delta = src_rotated_point.x - src_x;
|
||||||
__m128 const x_d = _mm_set_ps1(x_delta);
|
//round_if_very_small(x_delta);
|
||||||
__m128 const inv_x_d = _mm_set_ps1(1 - x_delta);
|
double y_delta = src_rotated_point.y - src_y;
|
||||||
__m128 top_left = _mm_set_ps(src.buffer[src_index_1], src.buffer[src_index_1 + 1], src.buffer[src_index_1 + 2], 0.0);
|
//round_if_very_small(y_delta);
|
||||||
__m128 top_right = _mm_set_ps(src.buffer[src_index_2], src.buffer[src_index_2 + 1], src.buffer[src_index_2 + 2], 0.0);
|
double const inv_x = 1 - x_delta;
|
||||||
top_left = _mm_mul_ps(top_left, inv_x_d);
|
double const inv_y = 1 - y_delta;
|
||||||
top_right = _mm_mul_ps(top_right, x_d);
|
|
||||||
top_left = _mm_add_ps(top_left, top_right);
|
|
||||||
|
|
||||||
__m128 bottom_left = _mm_set_ps(src.buffer[src_index_3], src.buffer[src_index_3 + 1], src.buffer[src_index_3 + 2], 0.0);
|
// No SIMD
|
||||||
__m128 bottom_right = _mm_set_ps(src.buffer[src_index_4], src.buffer[src_index_4 + 1], src.buffer[src_index_4 + 2], 0.0);
|
rotate_buffer[rot_index] = (src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y
|
||||||
bottom_left = _mm_mul_ps(bottom_left, inv_x_d);
|
+ (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta;
|
||||||
bottom_right = _mm_mul_ps(bottom_right, x_d);
|
rotate_buffer[rot_index + 1] = (src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
|
||||||
bottom_left = _mm_add_ps(bottom_left, bottom_right);
|
+ (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta;
|
||||||
|
rotate_buffer[rot_index + 2] = (src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
|
||||||
__m128 const y_d = _mm_set_ps1(y_delta);
|
+ (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta;
|
||||||
__m128 const inv_y_d = _mm_set_ps1(1 - y_delta);
|
|
||||||
top_left = _mm_mul_ps(top_left, inv_y_d);
|
|
||||||
bottom_left = _mm_mul_ps(bottom_left, y_d);
|
|
||||||
top_left = _mm_add_ps(top_left, bottom_left);
|
|
||||||
|
|
||||||
// convert float values to uint8_t
|
|
||||||
rotate_buffer[rot_index] = top_left[3];
|
|
||||||
rotate_buffer[rot_index + 1] = top_left[2];
|
|
||||||
rotate_buffer[rot_index + 2] = top_left[1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Image* rotate(Image const& src, double angle)
|
Image* rotate(Image const& src, double angle)
|
||||||
|
@ -689,6 +673,12 @@ Image* rotate(Image const& src, double angle)
|
||||||
src_delta_y.y = src_delta_y.y - src_origin.y;
|
src_delta_y.y = src_delta_y.y - src_origin.y;
|
||||||
round_if_very_small(src_delta_y.x);
|
round_if_very_small(src_delta_y.x);
|
||||||
round_if_very_small(src_delta_y.y);
|
round_if_very_small(src_delta_y.y);
|
||||||
|
bool full_delta = false;
|
||||||
|
if (src_delta_x.x - (int) src_delta_x.x == 0
|
||||||
|
&& src_delta_x.y - (int) src_delta_x.y == 0
|
||||||
|
&& src_delta_y.x - (int) src_delta_y.x == 0
|
||||||
|
&& src_delta_y.y - (int) src_delta_y.y == 0)
|
||||||
|
full_delta = true;
|
||||||
|
|
||||||
|
|
||||||
// // steps for first column in source image (y)
|
// // steps for first column in source image (y)
|
||||||
|
@ -705,57 +695,19 @@ Image* rotate(Image const& src, double angle)
|
||||||
unsigned int const src_limit = src.width * src.height * 3;
|
unsigned int const src_limit = src.width * src.height * 3;
|
||||||
unsigned int const rot_limit = rotated->width * rotated->height * 3;
|
unsigned int const rot_limit = rotated->width * rotated->height * 3;
|
||||||
|
|
||||||
// for (int y_i = 0; y_i <= (int) origin_nb_steps; ++y_i)
|
|
||||||
// {
|
|
||||||
// // first column origin
|
|
||||||
// Point const rot_origin(tl.x + y_i * rotated_step.x, tl.y + y_i * rotated_step.y);
|
|
||||||
// Point rot_point(rot_origin.x, rot_origin.y);
|
|
||||||
// DPoint rot_delta(0.0, 0.0);
|
|
||||||
//
|
|
||||||
// Point previous = rot_origin;
|
|
||||||
//
|
|
||||||
// for (int x_i = 0; x_i <= (int) line_nb_steps; ++x_i)
|
|
||||||
// {
|
|
||||||
//
|
|
||||||
// Point const delta(rot_point.x - tl.x, rot_point.y - tl.y);
|
|
||||||
// DPoint src_rotated_point(src_tl.x + delta.x * src_delta_x.x + delta.y * src_delta_y.x,
|
|
||||||
// src_tl.y + delta.x * src_delta_x.y + delta.y * src_delta_y.y);
|
|
||||||
//
|
|
||||||
// rotate_pixel(src, *rotated, src_rotated_point, rot_point, src_limit, rot_limit);
|
|
||||||
//
|
|
||||||
// if (previous.x != rot_point.x && previous.y != rot_point.y)
|
|
||||||
// {
|
|
||||||
// int y_slope = rot_point.y > previous.y ? 1 : -1;
|
|
||||||
// int tmp_y = rot_point.y;
|
|
||||||
// rot_point.y = previous.y;
|
|
||||||
//
|
|
||||||
// src_rotated_point.x -= y_slope * src_delta_y.x;
|
|
||||||
// src_rotated_point.y -= y_slope * src_delta_y.y;
|
|
||||||
//
|
|
||||||
// rotate_pixel(src, *rotated, src_rotated_point, rot_point, src_limit, rot_limit);
|
|
||||||
//
|
|
||||||
// rot_point.y = tmp_y;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// previous = rot_point;
|
|
||||||
//
|
|
||||||
// rot_delta.x += bresenham.x;
|
|
||||||
// rot_point.x = rot_origin.x + (int) rot_delta.x;
|
|
||||||
//
|
|
||||||
// rot_delta.y += bresenham.y;
|
|
||||||
// rot_point.y = rot_origin.y + (int) rot_delta.y;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
|
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
|
||||||
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
|
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
|
||||||
|
|
||||||
unsigned int const buffer_size = rotated->width * 3;
|
unsigned int const buffer_pixel_capacity = 128 / 3;
|
||||||
|
unsigned int const buffer_size = buffer_pixel_capacity * 3;
|
||||||
|
unsigned int buffer_index = 0;
|
||||||
uint8_t buffer[buffer_size];
|
uint8_t buffer[buffer_size];
|
||||||
|
memset(buffer, 0, buffer_size);
|
||||||
|
unsigned int buffer_offset = 0;
|
||||||
|
|
||||||
for (unsigned int y = 0; y < rotated->height; ++y)
|
for (unsigned int y = 0; y < rotated->height; ++y)
|
||||||
{
|
{
|
||||||
memset(buffer, 0, buffer_size);
|
//memset(buffer, 0, buffer_size);
|
||||||
DPoint src_rotated_point(rot_origin_in_src.x + y * src_delta_y.x,
|
DPoint src_rotated_point(rot_origin_in_src.x + y * src_delta_y.x,
|
||||||
rot_origin_in_src.y + y * src_delta_y.y);
|
rot_origin_in_src.y + y * src_delta_y.y);
|
||||||
|
|
||||||
|
@ -764,13 +716,22 @@ Image* rotate(Image const& src, double angle)
|
||||||
if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width
|
if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width
|
||||||
&& src_rotated_point.y >= 0 && src_rotated_point.y < src.height)
|
&& src_rotated_point.y >= 0 && src_rotated_point.y < src.height)
|
||||||
{
|
{
|
||||||
rotate_pixel(src, src_rotated_point, src_limit, rot_limit, buffer, x * 3);
|
rotate_pixel(src, src_rotated_point,
|
||||||
|
src_limit, rot_limit,
|
||||||
|
buffer, buffer_index * 3, full_delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
src_rotated_point.x += src_delta_x.x;
|
src_rotated_point.x += src_delta_x.x;
|
||||||
src_rotated_point.y += src_delta_x.y;
|
src_rotated_point.y += src_delta_x.y;
|
||||||
|
++buffer_index;
|
||||||
|
if (buffer_index == buffer_pixel_capacity)
|
||||||
|
{
|
||||||
|
memcpy(rotated->buffer + buffer_offset, buffer, buffer_size);
|
||||||
|
buffer_offset += buffer_size;
|
||||||
|
buffer_index = 0;
|
||||||
|
memset(buffer, 0, buffer_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
memcpy(rotated->buffer + buffer_size * y, buffer, buffer_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return rotated;
|
return rotated;
|
||||||
|
@ -784,55 +745,42 @@ Image* rotate(Image const& src, double angle)
|
||||||
template<unsigned int W, unsigned int H>
|
template<unsigned int W, unsigned int H>
|
||||||
void rotate_pixel(TiledImage<W, H> const& src,
|
void rotate_pixel(TiledImage<W, H> const& src,
|
||||||
DPoint const& src_rotated_point,
|
DPoint const& src_rotated_point,
|
||||||
uint8_t* rot_tile, unsigned int rot_index)
|
uint8_t* rot_tile, unsigned int rot_index,
|
||||||
|
bool full_delta)
|
||||||
{
|
{
|
||||||
uint8_t const* src_index_1 = src.access_pixel((int) src_rotated_point.x, (int) src_rotated_point.y);
|
int const src_x = (int) src_rotated_point.x;
|
||||||
|
int const src_y = (int) src_rotated_point.y;
|
||||||
double x_delta = src_rotated_point.x - (int) src_rotated_point.x;
|
uint8_t const* src_index_1 = src.access_pixel(src_x, src_y);
|
||||||
round_if_very_small(x_delta);
|
|
||||||
double y_delta = src_rotated_point.y - (int) src_rotated_point.y;
|
|
||||||
round_if_very_small(y_delta);
|
|
||||||
|
|
||||||
// special case if we can directly map the src to the dest
|
// special case if we can directly map the src to the dest
|
||||||
if (x_delta == 0 && y_delta == 0)
|
if (full_delta)
|
||||||
{
|
{
|
||||||
memcpy(&rot_tile[rot_index], src_index_1, 3 * sizeof (uint8_t));
|
memcpy(&rot_tile[rot_index], src_index_1, 3 * sizeof (uint8_t));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t const* src_index_2 = src.access_pixel((int) src_rotated_point.x + 1, (int) src_rotated_point.y);
|
double x_delta = src_rotated_point.x - src_x;
|
||||||
uint8_t const* src_index_3 = src.access_pixel((int) src_rotated_point.x, (int) src_rotated_point.y + 1);
|
//round_if_very_small(x_delta);
|
||||||
uint8_t const* src_index_4 = src.access_pixel((int) src_rotated_point.x + 1, (int) src_rotated_point.y + 1);
|
double y_delta = src_rotated_point.y - src_y;
|
||||||
|
//round_if_very_small(y_delta);
|
||||||
|
double const inv_x = 1 - x_delta;
|
||||||
|
double const inv_y = 1 - y_delta;
|
||||||
|
|
||||||
|
uint8_t const* src_index_2 = src.access_pixel(src_x + 1, src_y);
|
||||||
|
uint8_t const* src_index_3 = src.access_pixel(src_x, src_y + 1);
|
||||||
|
uint8_t const* src_index_4 = src.access_pixel(src_x + 1, src_y + 1);
|
||||||
|
|
||||||
// FIXME: deal with image border
|
// FIXME: deal with image border
|
||||||
if (!src_index_1 || !src_index_2 || !src_index_3 || !src_index_4)
|
if (!src_index_4)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// SIMD
|
// No SIMD
|
||||||
__m128 const x_d = _mm_set_ps1(x_delta);
|
rot_tile[rot_index] = (src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y
|
||||||
__m128 const inv_x_d = _mm_set_ps1(1 - x_delta);
|
+ (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta;
|
||||||
__m128 top_left = _mm_set_ps(*src_index_1, *(src_index_1 + 1), *(src_index_1 + 2), 0.0);
|
rot_tile[rot_index + 1] = (src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y
|
||||||
__m128 top_right = _mm_set_ps(*src_index_2, *(src_index_2 + 1), *(src_index_2 + 2), 0.0);
|
+ (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta;
|
||||||
top_left = _mm_mul_ps(top_left, inv_x_d);
|
rot_tile[rot_index + 2] = (src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y
|
||||||
top_right = _mm_mul_ps(top_right, x_d);
|
+ (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta;
|
||||||
top_left = _mm_add_ps(top_left, top_right);
|
|
||||||
|
|
||||||
__m128 bottom_left = _mm_set_ps(*src_index_3, *(src_index_3 + 1), *(src_index_3 + 2), 0.0);
|
|
||||||
__m128 bottom_right = _mm_set_ps(*src_index_4, *(src_index_4 + 1), *(src_index_4 + 2), 0.0);
|
|
||||||
bottom_left = _mm_mul_ps(bottom_left, inv_x_d);
|
|
||||||
bottom_right = _mm_mul_ps(bottom_right, x_d);
|
|
||||||
bottom_left = _mm_add_ps(bottom_left, bottom_right);
|
|
||||||
|
|
||||||
__m128 const y_d = _mm_set_ps1(y_delta);
|
|
||||||
__m128 const inv_y_d = _mm_set_ps1(1 - y_delta);
|
|
||||||
top_left = _mm_mul_ps(top_left, inv_y_d);
|
|
||||||
bottom_left = _mm_mul_ps(bottom_left, y_d);
|
|
||||||
top_left = _mm_add_ps(top_left, bottom_left);
|
|
||||||
|
|
||||||
// convert float values to uint8_t
|
|
||||||
rot_tile[rot_index] = top_left[3];
|
|
||||||
rot_tile[rot_index + 1] = top_left[2];
|
|
||||||
rot_tile[rot_index + 2] = top_left[1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<unsigned int W, unsigned int H>
|
template<unsigned int W, unsigned int H>
|
||||||
|
@ -857,15 +805,26 @@ rotate(TiledImage<W, H> const& src, double angle)
|
||||||
src_delta_y.y = src_delta_y.y - src_origin.y;
|
src_delta_y.y = src_delta_y.y - src_origin.y;
|
||||||
round_if_very_small(src_delta_y.x);
|
round_if_very_small(src_delta_y.x);
|
||||||
round_if_very_small(src_delta_y.y);
|
round_if_very_small(src_delta_y.y);
|
||||||
|
bool full_delta = false;
|
||||||
|
if (src_delta_x.x - (int) src_delta_x.x == 0
|
||||||
|
&& src_delta_x.y - (int) src_delta_x.y == 0
|
||||||
|
&& src_delta_y.x - (int) src_delta_y.x == 0
|
||||||
|
&& src_delta_y.y - (int) src_delta_y.y == 0)
|
||||||
|
full_delta = true;
|
||||||
|
|
||||||
|
|
||||||
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
|
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
|
||||||
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
|
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
|
||||||
|
|
||||||
|
uint8_t tile[W * H * 3];
|
||||||
|
memset(tile, 0, W * H * 3);
|
||||||
|
|
||||||
for (unsigned int y = 0; y < rotated->nb_row_tile; ++y)
|
for (unsigned int y = 0; y < rotated->nb_row_tile; ++y)
|
||||||
{
|
{
|
||||||
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
|
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
|
||||||
{
|
{
|
||||||
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
|
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
|
||||||
|
unsigned int rot_index = 0;
|
||||||
|
|
||||||
for (unsigned int j = 0; j < H; ++j)
|
for (unsigned int j = 0; j < H; ++j)
|
||||||
{
|
{
|
||||||
|
@ -876,20 +835,21 @@ rotate(TiledImage<W, H> const& src, double angle)
|
||||||
|
|
||||||
for (unsigned int i = 0; i < W; ++i)
|
for (unsigned int i = 0; i < W; ++i)
|
||||||
{
|
{
|
||||||
unsigned int const rot_index = (j * W + i) * 3;
|
|
||||||
|
|
||||||
if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width
|
if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width
|
||||||
&& src_rotated_point.y >= 0 && src_rotated_point.y < src.height)
|
&& src_rotated_point.y >= 0 && src_rotated_point.y < src.height)
|
||||||
{
|
{
|
||||||
rotate_pixel(src, src_rotated_point,
|
rotate_pixel(src, src_rotated_point,
|
||||||
rotated->tiles[rot_tile_index], rot_index);
|
tile, rot_index, full_delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
src_rotated_point.x += src_delta_x.x;
|
src_rotated_point.x += src_delta_x.x;
|
||||||
src_rotated_point.y += src_delta_x.y;
|
src_rotated_point.y += src_delta_x.y;
|
||||||
|
rot_index += 3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memcpy(rotated->tiles[rot_tile_index], tile, W * H * 3);
|
||||||
|
memset(tile, 0, W * H * 3);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1059,7 +1019,7 @@ int main(int argc, char* argv[])
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool perform_check = true;
|
bool perform_check = false;
|
||||||
|
|
||||||
if (perform_check)
|
if (perform_check)
|
||||||
{
|
{
|
||||||
|
@ -1077,7 +1037,7 @@ int main(int argc, char* argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
Image img(argv[1]);
|
Image img(argv[1]);
|
||||||
TiledImage<32, 32> tiled_img(argv[1]);
|
TiledImage<8, 8> tiled_img(argv[1]);
|
||||||
|
|
||||||
for (double rotation = 0; rotation < 360; rotation += 45)
|
for (double rotation = 0; rotation < 360; rotation += 45)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue