From a992cba5ed6151930b3b44d8b51edd65d1959591 Mon Sep 17 00:00:00 2001 From: Fabien Freling Date: Wed, 16 Jul 2014 20:44:32 +0200 Subject: [PATCH] Implement RGBX for tiled images. Interpolation is now done with SIMD with -DSIMD for tiled images as well. --- TODO.md | 5 ++- rotation.cpp | 121 +++++++++++++++++++++++++++++++++------------------ 2 files changed, 82 insertions(+), 44 deletions(-) diff --git a/TODO.md b/TODO.md index b576775..1b6507c 100644 --- a/TODO.md +++ b/TODO.md @@ -15,13 +15,14 @@ [X] Overlap [-] Rotate in one temp tile then copy/move it [X] Align tiles in memory +[ ] Touch beginning of tile ## Alignement [X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place) [X] Load pixels in 64-bit variable [X] Directly load in SIMD 128-bit variable [ ] Align memory on 16 bytes (would require padding) -[ ] RGBX tiles +[X] RGBX tiles ## Layout [ ] Pack 4 neighbors in 16B structure (aligned) @@ -29,5 +30,5 @@ [ ] Spiral layout? # Quality -[X] Interpolate using SIMD, SSE (no big gain, alignement problem?) +[X] Interpolate using SIMD, SSE [ ] Image borders diff --git a/rotation.cpp b/rotation.cpp index ce85872..6450367 100644 --- a/rotation.cpp +++ b/rotation.cpp @@ -240,7 +240,7 @@ struct Image { template struct TiledImage : public Image { - uint8_t* tiles; + pvalue_t* tiles; unsigned int static const tile_w = W; unsigned int static const tile_h = H; @@ -290,52 +290,52 @@ struct TiledImage : public Image { } } - uint8_t const* + pvalue_t const* get_tile(unsigned int index) const { if (index >= nb_col_tile * nb_row_tile) return nullptr; - return tiles + index * tile_size * 3; + return tiles + index * tile_size * PIXEL_SIZE; } - uint8_t* + pvalue_t* get_tile(unsigned int index) { if (index >= nb_col_tile * nb_row_tile) return nullptr; - return tiles + index * tile_size * 3; + return tiles + index * tile_size * PIXEL_SIZE; } - uint8_t* + pvalue_t* access_pixel(unsigned int x, unsigned int y) { if (x >= width || y >= height) return nullptr; - unsigned int const tile_width = (tile_w + 1) * 3; + unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE; unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w); - uint8_t* tile = this->get_tile(tile_index); + pvalue_t* tile = this->get_tile(tile_index); unsigned int const tile_j = y % tile_h; unsigned int const tile_i = x % tile_w; - return tile + tile_j * tile_width + (tile_i * 3); + return tile + tile_j * tile_width + (tile_i * PIXEL_SIZE); } - uint8_t const* + pvalue_t const* access_pixel(unsigned int x, unsigned int y) const { if (x >= width || y >= height) return nullptr; - unsigned int const tile_width = (tile_w + 1) * 3; + unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE; unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w); - const uint8_t* tile = this->get_tile(tile_index); + const pvalue_t* tile = this->get_tile(tile_index); unsigned int const tile_j = y % tile_h; unsigned int const tile_i = x % tile_w; - return tile + tile_j * tile_width + (tile_i * 3); + return tile + tile_j * tile_width + (tile_i * PIXEL_SIZE); } PackedPixel @@ -367,15 +367,15 @@ struct TiledImage : public Image { print_tile(unsigned int index) const { cout << "Tile[" << index << "]" << endl; - uint8_t const* tile = this->get_tile(index); - unsigned int const tile_width = (tile_w + 1) * 3; + pvalue_t const* tile = this->get_tile(index); + unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE; for (unsigned int j = 0; j < tile_h + 1; ++j) { for (unsigned int i = 0; i < tile_w + 1; ++i) { if (i != 0) cout << ", "; - uint8_t const* p = tile + j * tile_width + i * 3; + pvalue_t const* p = tile + j * tile_width + i * PIXEL_SIZE; cout << (int) *p << " " << (int) *(p + 1) << " " << (int) *(p + 2); } @@ -386,7 +386,7 @@ struct TiledImage : public Image { void fill_overlap() { - unsigned int const tile_width = (W + 1) * 3; + unsigned int const tile_width = (W + 1) * PIXEL_SIZE; for (int j = nb_row_tile - 1; j >= 0; --j) for (unsigned int i = 0; i < nb_col_tile; ++i) @@ -394,21 +394,21 @@ struct TiledImage : public Image { // copy last line overlap if (j != (int) nb_row_tile - 1) { - uint8_t const* tile_src = this->access_pixel(i * W, (j + 1) * H); - uint8_t* tile_dst = this->access_pixel(i * W, j * H); + pvalue_t const* tile_src = this->access_pixel(i * W, (j + 1) * H); + pvalue_t* tile_dst = this->access_pixel(i * W, j * H); tile_dst += H * tile_width; - memcpy(tile_dst, tile_src, tile_width * sizeof (uint8_t)); + memcpy(tile_dst, tile_src, tile_width * sizeof (pvalue_t)); } // copy last col overlap if (i != nb_col_tile - 1) { - uint8_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile); - uint8_t* tile_dst = this->get_tile(i + j * nb_col_tile); - tile_dst += W * 3; + pvalue_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile); + pvalue_t* tile_dst = this->get_tile(i + j * nb_col_tile); + tile_dst += W * PIXEL_SIZE; for (unsigned int y = 0; y < H; ++y) { - memcpy(tile_dst, tile_src, 3 * sizeof (uint8_t)); + memcpy(tile_dst, tile_src, PIXEL_SIZE * sizeof (pvalue_t)); tile_src += tile_width; tile_dst += tile_width; } @@ -447,8 +447,8 @@ struct TiledImage : public Image { ++nb_row_tile; unsigned int const nb_tiles = nb_col_tile * nb_row_tile; - tiles = new uint8_t[nb_tiles * tile_size * 3]; - memset(tiles, 0, nb_tiles * tile_size * 3 * sizeof (uint8_t)); + tiles = new pvalue_t[nb_tiles * tile_size * PIXEL_SIZE]; + memset(tiles, 0, nb_tiles * tile_size * PIXEL_SIZE * sizeof (pvalue_t)); } virtual bool read_body(std::ifstream& istr) override @@ -459,10 +459,11 @@ struct TiledImage : public Image { for (unsigned int j = 0; j < height; ++j) for (unsigned int i = 0; i < width; ++i) { - uint8_t* tile = this->access_pixel(i, j); + pvalue_t* tile = this->access_pixel(i, j); *(tile++) = istr.get(); *(tile++) = istr.get(); *(tile++) = istr.get(); + *(tile++) = 0; // padding } this->fill_overlap(); @@ -475,10 +476,11 @@ struct TiledImage : public Image { for (unsigned int j = 0; j < height; ++j) for (unsigned int i = 0; i < width; ++i) { - uint8_t const* tile = this->access_pixel(i, j); + pvalue_t const* tile = this->access_pixel(i, j); ostr << (char) *(tile++); ostr << (char) *(tile++); ostr << (char) *(tile++); + tile++; // padding } return true; @@ -769,17 +771,16 @@ Image* rotate(Image const& src, double angle) template void rotate_pixel(TiledImage const& src, Point const& src_rotated_point, - uint8_t* rot_tile) + pvalue_t* rot_tile) { unsigned int const quantize = 8; int const src_x = src_rotated_point.x >> 3; int const src_y = src_rotated_point.y >> 3; - uint8_t const* src_index_1 = src.access_pixel(src_x, src_y); - uint8_t const* src_index_2 = src_index_1 + 3; - uint8_t const* src_index_3 = src_index_1 + (W + 1) * 3; - uint8_t const* src_index_4 = src_index_3 + 3; + pvalue_t const* src_index_1 = src.access_pixel(src_x, src_y); + pvalue_t const* src_index_3 = src_index_1 + (W + 1) * PIXEL_SIZE; + pvalue_t const* src_index_4 = src_index_3 + PIXEL_SIZE; // FIXME: deal with image border if (!src_index_4) @@ -790,13 +791,41 @@ void rotate_pixel(TiledImage const& src, unsigned int const inv_x = quantize - x_delta; unsigned int const inv_y = quantize - y_delta; - // No SIMD +#ifndef SIMD + + pvalue_t const* src_index_2 = src_index_1 + PIXEL_SIZE; + rot_tile[0] = ((src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y + (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta) >> 6; rot_tile[1] = ((src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y + (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta) >> 6; rot_tile[2] = ((src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y + (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta) >> 6; + +#else + + // X-axis + __m128i top = _mm_loadu_si128((__m128i*) src_index_1); + __m128i bottom = _mm_loadu_si128((__m128i*) src_index_3); + __m128i coef = _mm_set_epi16(x_delta, x_delta, x_delta, x_delta, inv_x, inv_x, inv_x, inv_x); + top = _mm_mullo_epi16(top, coef); + bottom = _mm_mullo_epi16(bottom, coef); + + // Y-axis + coef = _mm_set1_epi16(inv_y); + top = _mm_mullo_epi16(top, coef); + coef = _mm_set1_epi16(y_delta); + bottom = _mm_mullo_epi16(bottom, coef); + top = _mm_add_epi16(top, bottom); + + top = _mm_srli_epi16(top, 6); + + rot_tile[0] = _mm_extract_epi16(top, 0) + _mm_extract_epi16(top, 4); + rot_tile[1] = _mm_extract_epi16(top, 1) + _mm_extract_epi16(top, 5); + rot_tile[2] = _mm_extract_epi16(top, 2) + _mm_extract_epi16(top, 6); + + +#endif // ! SIMD } template @@ -834,7 +863,7 @@ rotate(TiledImage const& src, double angle) for (unsigned int x = 0; x < rotated->nb_col_tile; ++x) { unsigned int const rot_tile_index = y * rotated->nb_col_tile + x; - uint8_t* runner = rotated->get_tile(rot_tile_index); + pvalue_t* runner = rotated->get_tile(rot_tile_index); for (unsigned int j = 0; j < H; ++j) { @@ -854,11 +883,11 @@ rotate(TiledImage const& src, double angle) rotate_pixel(src, src_runner, runner); } - runner += 3; + runner += PIXEL_SIZE; } // Jump overlapping pixel - runner += 3; + runner += PIXEL_SIZE; } } } @@ -1049,6 +1078,8 @@ int main(int argc, char* argv[]) } double const step = 15; + bool save_output_img = false; + bool print_each_run = false; // No tile Image img(argv[1]); @@ -1063,9 +1094,12 @@ int main(int argc, char* argv[]) auto const duration_ms = std::chrono::duration_cast(after - before); average += duration_ms.count(); - //cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl; + if (print_each_run) + cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl; + + if (save_output_img) + rotated->save(get_save_path("rotated", rotation)); - //rotated->save(get_save_path("rotated", rotation)); delete rotated; ++i; } @@ -1073,7 +1107,7 @@ int main(int argc, char* argv[]) cout << " average: " << average / i << "ms" << endl << endl; // Tile - TiledImage<16, 16> tiled_img(argv[1]); + TiledImage<32, 32> tiled_img(argv[1]); average = 0.0; i = 0; cout << "Tiled image" << endl; @@ -1085,9 +1119,12 @@ int main(int argc, char* argv[]) auto const duration_ms = std::chrono::duration_cast(after - before); average += duration_ms.count(); - //cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl; + if (print_each_run) + cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl; + + if (save_output_img) + rotated->save(get_save_path("rotated_tiled", rotation)); - //rotated->save(get_save_path("rotated_tiled", rotation)); delete rotated; ++i; }