diff --git a/Makefile b/Makefile index 46fdf7e..20ec108 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,16 @@ CXX = clang++ CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math -Werror -g BUILD_DIR=/tmp +IMG=img/lena.ppm all: rotation.cpp $(CXX) $(CXXFLAGS) $< -o $(BUILD_DIR)/rotation clean: - @rm -f *~ *.o .*.swp *.ppm + @rm -f *~ *.o .*.swp *.ppm cachegrind.out.* run: all - $(BUILD_DIR)/rotation img/lena.ppm + $(BUILD_DIR)/rotation $(IMG) cachegrind: all - valgrind --tool=cachegrind $(BUILD_DIR)/rotation img/lena.ppm + valgrind --tool=cachegrind $(BUILD_DIR)/rotation $(IMG) diff --git a/TODO.md b/TODO.md index e6b08f3..6ac1571 100644 --- a/TODO.md +++ b/TODO.md @@ -7,16 +7,18 @@ [ ] Optimization for square images? [X] Fixed point computation? +[-] -funroll-loops # Cache [-] Rotate per channel -> no gain [ ] Load pixels in 64-bit variable [X] Cut image in tiles - [ ] Overlap? + [X] Overlap [-] Rotate in one temp tile then copy/move it [X] Align tiles in memory [-] Align memory -> no gain [ ] RGBX format +[ ] Spiral layout? # Quality [X] Interpolate using SIMD, SSE (no big gain) diff --git a/rotation.cpp b/rotation.cpp index f99caf4..f954899 100644 --- a/rotation.cpp +++ b/rotation.cpp @@ -249,7 +249,9 @@ struct TiledImage : public Image { uint8_t* tiles; unsigned int static const tile_w = W; unsigned int static const tile_h = H; - unsigned int static const tile_size = W * H; + + // two lines overlap, bottom + right + unsigned int static const tile_size = (W + 1) * (H + 1); unsigned int nb_col_tile; unsigned int nb_row_tile; @@ -294,21 +296,6 @@ struct TiledImage : public Image { } } - uint8_t* - access_pixel(unsigned int x, unsigned int y) - { - if (x >= width || y >= height) - return nullptr; - - unsigned int const tile_width = tile_w * 3; - - unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w); - uint8_t* tile = tiles + tile_index * tile_size * 3; - unsigned int const tile_j = y % tile_h; - unsigned int const tile_i = x % tile_w; - return tile + tile_j * tile_width + (tile_i * 3); - } - uint8_t const* get_tile(unsigned int index) const { @@ -327,13 +314,28 @@ struct TiledImage : public Image { return tiles + index * tile_size * 3; } + uint8_t* + access_pixel(unsigned int x, unsigned int y) + { + if (x >= width || y >= height) + return nullptr; + + unsigned int const tile_width = (tile_w + 1) * 3; + + unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w); + uint8_t* tile = this->get_tile(tile_index); + unsigned int const tile_j = y % tile_h; + unsigned int const tile_i = x % tile_w; + return tile + tile_j * tile_width + (tile_i * 3); + } + uint8_t const* access_pixel(unsigned int x, unsigned int y) const { if (x >= width || y >= height) return nullptr; - unsigned int const tile_width = tile_w * 3; + unsigned int const tile_width = (tile_w + 1) * 3; unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w); const uint8_t* tile = this->get_tile(tile_index); @@ -372,10 +374,10 @@ struct TiledImage : public Image { { cout << "Tile[" << index << "]" << endl; uint8_t const* tile = this->get_tile(index); - unsigned int const tile_width = tile_w * 3; - for (unsigned int j = 0; j < tile_h; ++j) + unsigned int const tile_width = (tile_w + 1) * 3; + for (unsigned int j = 0; j < tile_h + 1; ++j) { - for (unsigned int i = 0; i < tile_w; ++i) + for (unsigned int i = 0; i < tile_w + 1; ++i) { if (i != 0) cout << ", "; @@ -388,6 +390,38 @@ struct TiledImage : public Image { cout << endl; } + void fill_overlap() + { + unsigned int const tile_width = (W + 1) * 3; + + for (int j = nb_row_tile - 1; j >= 0; --j) + for (unsigned int i = 0; i < nb_col_tile; ++i) + { + // copy last line overlap + if (j != (int) nb_row_tile - 1) + { + uint8_t const* tile_src = this->access_pixel(i * W, (j + 1) * H); + uint8_t* tile_dst = this->access_pixel(i * W, j * H); + tile_dst += H * tile_width; + memcpy(tile_dst, tile_src, tile_width * sizeof (uint8_t)); + } + + // copy last col overlap + if (i != nb_col_tile - 1) + { + uint8_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile); + uint8_t* tile_dst = this->get_tile(i + j * nb_col_tile); + tile_dst += W * 3; + for (unsigned int y = 0; y < H; ++y) + { + memcpy(tile_dst, tile_src, 3 * sizeof (uint8_t)); + tile_src += tile_width; + tile_dst += tile_width; + } + } + } + } + protected: void insert_pixel(PackedPixel& pack, unsigned int x, unsigned int y) const @@ -423,7 +457,7 @@ struct TiledImage : public Image { memset(tiles, 0, nb_tiles * tile_size * 3 * sizeof (uint8_t)); } - virtual bool read_body(std::ifstream& istr) + virtual bool read_body(std::ifstream& istr) override { this->allocate_memory(width, height); @@ -437,6 +471,8 @@ struct TiledImage : public Image { *(tile++) = istr.get(); } + this->fill_overlap(); + return true; } @@ -720,9 +756,9 @@ void rotate_pixel(TiledImage const& src, int const src_y = src_rotated_point.y >> 3; uint8_t const* src_index_1 = src.access_pixel(src_x, src_y); - uint8_t const* src_index_2 = src.access_pixel(src_x + 1, src_y); - uint8_t const* src_index_3 = src.access_pixel(src_x, src_y + 1); - uint8_t const* src_index_4 = src.access_pixel(src_x + 1, src_y + 1); + uint8_t const* src_index_2 = src_index_1 + 3; + uint8_t const* src_index_3 = src_index_1 + (W + 1) * 3; + uint8_t const* src_index_4 = src_index_3 + 3; // FIXME: deal with image border if (!src_index_4) @@ -799,10 +835,15 @@ rotate(TiledImage const& src, double angle) runner += 3; } + + // Jump overlapping pixel + runner += 3; } } } +// rotated->fill_overlap(); + return rotated; } @@ -1009,7 +1050,7 @@ int main(int argc, char* argv[]) cout << " average: " << average / i << "ms" << endl << endl; // Tile - TiledImage<8, 8> tiled_img(argv[1]); + TiledImage<16, 16> tiled_img(argv[1]); average = 0.0; i = 0; for (double rotation = 0; rotation < 360; rotation += 45)