diff --git a/rotation.cpp b/rotation.cpp index a46e9f7..2377dd8 100644 --- a/rotation.cpp +++ b/rotation.cpp @@ -595,9 +595,12 @@ inline void rotate_pixel(Image const& src, /*uint8_t* rotated,*/ DPoint const& src_rotated_point, /*Point const& rot_point,*/ unsigned int const src_limit, unsigned int const rot_limit, - uint8_t* rotate_buffer, unsigned int rot_index) + uint8_t* rotate_buffer, unsigned int rot_index, + bool full_delta) { - unsigned int src_index = ((int) src_rotated_point.y * src.width + (int) src_rotated_point.x) * 3; + int const src_x = (int) src_rotated_point.x; + int const src_y = (int) src_rotated_point.y; + unsigned int src_index = (src_y * src.width + src_x) * 3; // unsigned int rot_index = (rot_point.y * rotated.width + rot_point.x) * 3; // Out-of-bounds check @@ -614,46 +617,27 @@ void rotate_pixel(Image const& src, /*uint8_t* rotated,*/ if (src_index_4 >= src_limit) return; - double x_delta = src_rotated_point.x - floor(src_rotated_point.x); - round_if_very_small(x_delta); - double y_delta = src_rotated_point.y - floor(src_rotated_point.y); - round_if_very_small(y_delta); - // special case if we can directly map the src to the dest - if (x_delta == 0 && y_delta == 0) + if (full_delta) { - // memcpy(&rotated.buffer[rot_index], &src.buffer[src_index], 3 * sizeof (uint8_t)); - rotate_buffer[rot_index] = src.buffer[src_index]; - rotate_buffer[rot_index + 1] = src.buffer[src_index + 1]; - rotate_buffer[rot_index + 2] = src.buffer[src_index + 2]; + memcpy(&rotate_buffer[rot_index], &src.buffer[src_index], 3 * sizeof (uint8_t)); return; } - // SIMD - __m128 const x_d = _mm_set_ps1(x_delta); - __m128 const inv_x_d = _mm_set_ps1(1 - x_delta); - __m128 top_left = _mm_set_ps(src.buffer[src_index_1], src.buffer[src_index_1 + 1], src.buffer[src_index_1 + 2], 0.0); - __m128 top_right = _mm_set_ps(src.buffer[src_index_2], src.buffer[src_index_2 + 1], src.buffer[src_index_2 + 2], 0.0); - top_left = _mm_mul_ps(top_left, inv_x_d); - top_right = _mm_mul_ps(top_right, x_d); - top_left = _mm_add_ps(top_left, top_right); + double x_delta = src_rotated_point.x - src_x; + //round_if_very_small(x_delta); + double y_delta = src_rotated_point.y - src_y; + //round_if_very_small(y_delta); + double const inv_x = 1 - x_delta; + double const inv_y = 1 - y_delta; - __m128 bottom_left = _mm_set_ps(src.buffer[src_index_3], src.buffer[src_index_3 + 1], src.buffer[src_index_3 + 2], 0.0); - __m128 bottom_right = _mm_set_ps(src.buffer[src_index_4], src.buffer[src_index_4 + 1], src.buffer[src_index_4 + 2], 0.0); - bottom_left = _mm_mul_ps(bottom_left, inv_x_d); - bottom_right = _mm_mul_ps(bottom_right, x_d); - bottom_left = _mm_add_ps(bottom_left, bottom_right); - - __m128 const y_d = _mm_set_ps1(y_delta); - __m128 const inv_y_d = _mm_set_ps1(1 - y_delta); - top_left = _mm_mul_ps(top_left, inv_y_d); - bottom_left = _mm_mul_ps(bottom_left, y_d); - top_left = _mm_add_ps(top_left, bottom_left); - - // convert float values to uint8_t - rotate_buffer[rot_index] = top_left[3]; - rotate_buffer[rot_index + 1] = top_left[2]; - rotate_buffer[rot_index + 2] = top_left[1]; + // No SIMD + rotate_buffer[rot_index] = (src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y + + (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta; + rotate_buffer[rot_index + 1] = (src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y + + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta; + rotate_buffer[rot_index + 2] = (src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y + + (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta; } Image* rotate(Image const& src, double angle) @@ -689,6 +673,12 @@ Image* rotate(Image const& src, double angle) src_delta_y.y = src_delta_y.y - src_origin.y; round_if_very_small(src_delta_y.x); round_if_very_small(src_delta_y.y); + bool full_delta = false; + if (src_delta_x.x - (int) src_delta_x.x == 0 + && src_delta_x.y - (int) src_delta_x.y == 0 + && src_delta_y.x - (int) src_delta_y.x == 0 + && src_delta_y.y - (int) src_delta_y.y == 0) + full_delta = true; // // steps for first column in source image (y) @@ -705,57 +695,19 @@ Image* rotate(Image const& src, double angle) unsigned int const src_limit = src.width * src.height * 3; unsigned int const rot_limit = rotated->width * rotated->height * 3; -// for (int y_i = 0; y_i <= (int) origin_nb_steps; ++y_i) -// { -// // first column origin -// Point const rot_origin(tl.x + y_i * rotated_step.x, tl.y + y_i * rotated_step.y); -// Point rot_point(rot_origin.x, rot_origin.y); -// DPoint rot_delta(0.0, 0.0); -// -// Point previous = rot_origin; -// -// for (int x_i = 0; x_i <= (int) line_nb_steps; ++x_i) -// { -// -// Point const delta(rot_point.x - tl.x, rot_point.y - tl.y); -// DPoint src_rotated_point(src_tl.x + delta.x * src_delta_x.x + delta.y * src_delta_y.x, -// src_tl.y + delta.x * src_delta_x.y + delta.y * src_delta_y.y); -// -// rotate_pixel(src, *rotated, src_rotated_point, rot_point, src_limit, rot_limit); -// -// if (previous.x != rot_point.x && previous.y != rot_point.y) -// { -// int y_slope = rot_point.y > previous.y ? 1 : -1; -// int tmp_y = rot_point.y; -// rot_point.y = previous.y; -// -// src_rotated_point.x -= y_slope * src_delta_y.x; -// src_rotated_point.y -= y_slope * src_delta_y.y; -// -// rotate_pixel(src, *rotated, src_rotated_point, rot_point, src_limit, rot_limit); -// -// rot_point.y = tmp_y; -// } -// -// previous = rot_point; -// -// rot_delta.x += bresenham.x; -// rot_point.x = rot_origin.x + (int) rot_delta.x; -// -// rot_delta.y += bresenham.y; -// rot_point.y = rot_origin.y + (int) rot_delta.y; -// } -// } - DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation); DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid); - unsigned int const buffer_size = rotated->width * 3; + unsigned int const buffer_pixel_capacity = 128 / 3; + unsigned int const buffer_size = buffer_pixel_capacity * 3; + unsigned int buffer_index = 0; uint8_t buffer[buffer_size]; + memset(buffer, 0, buffer_size); + unsigned int buffer_offset = 0; for (unsigned int y = 0; y < rotated->height; ++y) { - memset(buffer, 0, buffer_size); + //memset(buffer, 0, buffer_size); DPoint src_rotated_point(rot_origin_in_src.x + y * src_delta_y.x, rot_origin_in_src.y + y * src_delta_y.y); @@ -764,13 +716,22 @@ Image* rotate(Image const& src, double angle) if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width && src_rotated_point.y >= 0 && src_rotated_point.y < src.height) { - rotate_pixel(src, src_rotated_point, src_limit, rot_limit, buffer, x * 3); + rotate_pixel(src, src_rotated_point, + src_limit, rot_limit, + buffer, buffer_index * 3, full_delta); } src_rotated_point.x += src_delta_x.x; src_rotated_point.y += src_delta_x.y; + ++buffer_index; + if (buffer_index == buffer_pixel_capacity) + { + memcpy(rotated->buffer + buffer_offset, buffer, buffer_size); + buffer_offset += buffer_size; + buffer_index = 0; + memset(buffer, 0, buffer_size); + } } - memcpy(rotated->buffer + buffer_size * y, buffer, buffer_size); } return rotated; @@ -784,55 +745,42 @@ Image* rotate(Image const& src, double angle) template void rotate_pixel(TiledImage const& src, DPoint const& src_rotated_point, - uint8_t* rot_tile, unsigned int rot_index) + uint8_t* rot_tile, unsigned int rot_index, + bool full_delta) { - uint8_t const* src_index_1 = src.access_pixel((int) src_rotated_point.x, (int) src_rotated_point.y); - - double x_delta = src_rotated_point.x - (int) src_rotated_point.x; - round_if_very_small(x_delta); - double y_delta = src_rotated_point.y - (int) src_rotated_point.y; - round_if_very_small(y_delta); + int const src_x = (int) src_rotated_point.x; + int const src_y = (int) src_rotated_point.y; + uint8_t const* src_index_1 = src.access_pixel(src_x, src_y); // special case if we can directly map the src to the dest - if (x_delta == 0 && y_delta == 0) + if (full_delta) { memcpy(&rot_tile[rot_index], src_index_1, 3 * sizeof (uint8_t)); return; } - uint8_t const* src_index_2 = src.access_pixel((int) src_rotated_point.x + 1, (int) src_rotated_point.y); - uint8_t const* src_index_3 = src.access_pixel((int) src_rotated_point.x, (int) src_rotated_point.y + 1); - uint8_t const* src_index_4 = src.access_pixel((int) src_rotated_point.x + 1, (int) src_rotated_point.y + 1); + double x_delta = src_rotated_point.x - src_x; + //round_if_very_small(x_delta); + double y_delta = src_rotated_point.y - src_y; + //round_if_very_small(y_delta); + double const inv_x = 1 - x_delta; + double const inv_y = 1 - y_delta; + + uint8_t const* src_index_2 = src.access_pixel(src_x + 1, src_y); + uint8_t const* src_index_3 = src.access_pixel(src_x, src_y + 1); + uint8_t const* src_index_4 = src.access_pixel(src_x + 1, src_y + 1); // FIXME: deal with image border - if (!src_index_1 || !src_index_2 || !src_index_3 || !src_index_4) + if (!src_index_4) return; - // SIMD - __m128 const x_d = _mm_set_ps1(x_delta); - __m128 const inv_x_d = _mm_set_ps1(1 - x_delta); - __m128 top_left = _mm_set_ps(*src_index_1, *(src_index_1 + 1), *(src_index_1 + 2), 0.0); - __m128 top_right = _mm_set_ps(*src_index_2, *(src_index_2 + 1), *(src_index_2 + 2), 0.0); - top_left = _mm_mul_ps(top_left, inv_x_d); - top_right = _mm_mul_ps(top_right, x_d); - top_left = _mm_add_ps(top_left, top_right); - - __m128 bottom_left = _mm_set_ps(*src_index_3, *(src_index_3 + 1), *(src_index_3 + 2), 0.0); - __m128 bottom_right = _mm_set_ps(*src_index_4, *(src_index_4 + 1), *(src_index_4 + 2), 0.0); - bottom_left = _mm_mul_ps(bottom_left, inv_x_d); - bottom_right = _mm_mul_ps(bottom_right, x_d); - bottom_left = _mm_add_ps(bottom_left, bottom_right); - - __m128 const y_d = _mm_set_ps1(y_delta); - __m128 const inv_y_d = _mm_set_ps1(1 - y_delta); - top_left = _mm_mul_ps(top_left, inv_y_d); - bottom_left = _mm_mul_ps(bottom_left, y_d); - top_left = _mm_add_ps(top_left, bottom_left); - - // convert float values to uint8_t - rot_tile[rot_index] = top_left[3]; - rot_tile[rot_index + 1] = top_left[2]; - rot_tile[rot_index + 2] = top_left[1]; + // No SIMD + rot_tile[rot_index] = (src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y + + (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta; + rot_tile[rot_index + 1] = (src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y + + (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta; + rot_tile[rot_index + 2] = (src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y + + (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta; } template @@ -857,15 +805,26 @@ rotate(TiledImage const& src, double angle) src_delta_y.y = src_delta_y.y - src_origin.y; round_if_very_small(src_delta_y.x); round_if_very_small(src_delta_y.y); + bool full_delta = false; + if (src_delta_x.x - (int) src_delta_x.x == 0 + && src_delta_x.y - (int) src_delta_x.y == 0 + && src_delta_y.x - (int) src_delta_y.x == 0 + && src_delta_y.y - (int) src_delta_y.y == 0) + full_delta = true; + DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation); DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid); + uint8_t tile[W * H * 3]; + memset(tile, 0, W * H * 3); + for (unsigned int y = 0; y < rotated->nb_row_tile; ++y) { for (unsigned int x = 0; x < rotated->nb_col_tile; ++x) { unsigned int const rot_tile_index = y * rotated->nb_col_tile + x; + unsigned int rot_index = 0; for (unsigned int j = 0; j < H; ++j) { @@ -876,20 +835,21 @@ rotate(TiledImage const& src, double angle) for (unsigned int i = 0; i < W; ++i) { - unsigned int const rot_index = (j * W + i) * 3; - if (src_rotated_point.x >= 0 && src_rotated_point.x < src.width && src_rotated_point.y >= 0 && src_rotated_point.y < src.height) { rotate_pixel(src, src_rotated_point, - rotated->tiles[rot_tile_index], rot_index); + tile, rot_index, full_delta); } src_rotated_point.x += src_delta_x.x; src_rotated_point.y += src_delta_x.y; - + rot_index += 3; } } + + memcpy(rotated->tiles[rot_tile_index], tile, W * H * 3); + memset(tile, 0, W * H * 3); } } @@ -1059,7 +1019,7 @@ int main(int argc, char* argv[]) return 1; } - bool perform_check = true; + bool perform_check = false; if (perform_check) { @@ -1077,7 +1037,7 @@ int main(int argc, char* argv[]) } Image img(argv[1]); - TiledImage<32, 32> tiled_img(argv[1]); + TiledImage<8, 8> tiled_img(argv[1]); for (double rotation = 0; rotation < 360; rotation += 45) {