Implement RGBX for tiled images.

Interpolation is now done with SIMD with -DSIMD for tiled images as
well.
This commit is contained in:
Fabien Freling 2014-07-16 20:44:32 +02:00
parent bcf16680ae
commit a992cba5ed
2 changed files with 82 additions and 44 deletions

View file

@ -15,13 +15,14 @@
[X] Overlap [X] Overlap
[-] Rotate in one temp tile then copy/move it [-] Rotate in one temp tile then copy/move it
[X] Align tiles in memory [X] Align tiles in memory
[ ] Touch beginning of tile
## Alignement ## Alignement
[X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place) [X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place)
[X] Load pixels in 64-bit variable [X] Load pixels in 64-bit variable
[X] Directly load in SIMD 128-bit variable [X] Directly load in SIMD 128-bit variable
[ ] Align memory on 16 bytes (would require padding) [ ] Align memory on 16 bytes (would require padding)
[ ] RGBX tiles [X] RGBX tiles
## Layout ## Layout
[ ] Pack 4 neighbors in 16B structure (aligned) [ ] Pack 4 neighbors in 16B structure (aligned)
@ -29,5 +30,5 @@
[ ] Spiral layout? [ ] Spiral layout?
# Quality # Quality
[X] Interpolate using SIMD, SSE (no big gain, alignement problem?) [X] Interpolate using SIMD, SSE
[ ] Image borders [ ] Image borders

View file

@ -240,7 +240,7 @@ struct Image {
template<unsigned int W, unsigned int H> template<unsigned int W, unsigned int H>
struct TiledImage : public Image { struct TiledImage : public Image {
uint8_t* tiles; pvalue_t* tiles;
unsigned int static const tile_w = W; unsigned int static const tile_w = W;
unsigned int static const tile_h = H; unsigned int static const tile_h = H;
@ -290,52 +290,52 @@ struct TiledImage : public Image {
} }
} }
uint8_t const* pvalue_t const*
get_tile(unsigned int index) const get_tile(unsigned int index) const
{ {
if (index >= nb_col_tile * nb_row_tile) if (index >= nb_col_tile * nb_row_tile)
return nullptr; return nullptr;
return tiles + index * tile_size * 3; return tiles + index * tile_size * PIXEL_SIZE;
} }
uint8_t* pvalue_t*
get_tile(unsigned int index) get_tile(unsigned int index)
{ {
if (index >= nb_col_tile * nb_row_tile) if (index >= nb_col_tile * nb_row_tile)
return nullptr; return nullptr;
return tiles + index * tile_size * 3; return tiles + index * tile_size * PIXEL_SIZE;
} }
uint8_t* pvalue_t*
access_pixel(unsigned int x, unsigned int y) access_pixel(unsigned int x, unsigned int y)
{ {
if (x >= width || y >= height) if (x >= width || y >= height)
return nullptr; return nullptr;
unsigned int const tile_width = (tile_w + 1) * 3; unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w); unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w);
uint8_t* tile = this->get_tile(tile_index); pvalue_t* tile = this->get_tile(tile_index);
unsigned int const tile_j = y % tile_h; unsigned int const tile_j = y % tile_h;
unsigned int const tile_i = x % tile_w; unsigned int const tile_i = x % tile_w;
return tile + tile_j * tile_width + (tile_i * 3); return tile + tile_j * tile_width + (tile_i * PIXEL_SIZE);
} }
uint8_t const* pvalue_t const*
access_pixel(unsigned int x, unsigned int y) const access_pixel(unsigned int x, unsigned int y) const
{ {
if (x >= width || y >= height) if (x >= width || y >= height)
return nullptr; return nullptr;
unsigned int const tile_width = (tile_w + 1) * 3; unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w); unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w);
const uint8_t* tile = this->get_tile(tile_index); const pvalue_t* tile = this->get_tile(tile_index);
unsigned int const tile_j = y % tile_h; unsigned int const tile_j = y % tile_h;
unsigned int const tile_i = x % tile_w; unsigned int const tile_i = x % tile_w;
return tile + tile_j * tile_width + (tile_i * 3); return tile + tile_j * tile_width + (tile_i * PIXEL_SIZE);
} }
PackedPixel PackedPixel
@ -367,15 +367,15 @@ struct TiledImage : public Image {
print_tile(unsigned int index) const print_tile(unsigned int index) const
{ {
cout << "Tile[" << index << "]" << endl; cout << "Tile[" << index << "]" << endl;
uint8_t const* tile = this->get_tile(index); pvalue_t const* tile = this->get_tile(index);
unsigned int const tile_width = (tile_w + 1) * 3; unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
for (unsigned int j = 0; j < tile_h + 1; ++j) for (unsigned int j = 0; j < tile_h + 1; ++j)
{ {
for (unsigned int i = 0; i < tile_w + 1; ++i) for (unsigned int i = 0; i < tile_w + 1; ++i)
{ {
if (i != 0) if (i != 0)
cout << ", "; cout << ", ";
uint8_t const* p = tile + j * tile_width + i * 3; pvalue_t const* p = tile + j * tile_width + i * PIXEL_SIZE;
cout << (int) *p << " " << (int) *(p + 1) << " " << (int) *(p + 2); cout << (int) *p << " " << (int) *(p + 1) << " " << (int) *(p + 2);
} }
@ -386,7 +386,7 @@ struct TiledImage : public Image {
void fill_overlap() void fill_overlap()
{ {
unsigned int const tile_width = (W + 1) * 3; unsigned int const tile_width = (W + 1) * PIXEL_SIZE;
for (int j = nb_row_tile - 1; j >= 0; --j) for (int j = nb_row_tile - 1; j >= 0; --j)
for (unsigned int i = 0; i < nb_col_tile; ++i) for (unsigned int i = 0; i < nb_col_tile; ++i)
@ -394,21 +394,21 @@ struct TiledImage : public Image {
// copy last line overlap // copy last line overlap
if (j != (int) nb_row_tile - 1) if (j != (int) nb_row_tile - 1)
{ {
uint8_t const* tile_src = this->access_pixel(i * W, (j + 1) * H); pvalue_t const* tile_src = this->access_pixel(i * W, (j + 1) * H);
uint8_t* tile_dst = this->access_pixel(i * W, j * H); pvalue_t* tile_dst = this->access_pixel(i * W, j * H);
tile_dst += H * tile_width; tile_dst += H * tile_width;
memcpy(tile_dst, tile_src, tile_width * sizeof (uint8_t)); memcpy(tile_dst, tile_src, tile_width * sizeof (pvalue_t));
} }
// copy last col overlap // copy last col overlap
if (i != nb_col_tile - 1) if (i != nb_col_tile - 1)
{ {
uint8_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile); pvalue_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile);
uint8_t* tile_dst = this->get_tile(i + j * nb_col_tile); pvalue_t* tile_dst = this->get_tile(i + j * nb_col_tile);
tile_dst += W * 3; tile_dst += W * PIXEL_SIZE;
for (unsigned int y = 0; y < H; ++y) for (unsigned int y = 0; y < H; ++y)
{ {
memcpy(tile_dst, tile_src, 3 * sizeof (uint8_t)); memcpy(tile_dst, tile_src, PIXEL_SIZE * sizeof (pvalue_t));
tile_src += tile_width; tile_src += tile_width;
tile_dst += tile_width; tile_dst += tile_width;
} }
@ -447,8 +447,8 @@ struct TiledImage : public Image {
++nb_row_tile; ++nb_row_tile;
unsigned int const nb_tiles = nb_col_tile * nb_row_tile; unsigned int const nb_tiles = nb_col_tile * nb_row_tile;
tiles = new uint8_t[nb_tiles * tile_size * 3]; tiles = new pvalue_t[nb_tiles * tile_size * PIXEL_SIZE];
memset(tiles, 0, nb_tiles * tile_size * 3 * sizeof (uint8_t)); memset(tiles, 0, nb_tiles * tile_size * PIXEL_SIZE * sizeof (pvalue_t));
} }
virtual bool read_body(std::ifstream& istr) override virtual bool read_body(std::ifstream& istr) override
@ -459,10 +459,11 @@ struct TiledImage : public Image {
for (unsigned int j = 0; j < height; ++j) for (unsigned int j = 0; j < height; ++j)
for (unsigned int i = 0; i < width; ++i) for (unsigned int i = 0; i < width; ++i)
{ {
uint8_t* tile = this->access_pixel(i, j); pvalue_t* tile = this->access_pixel(i, j);
*(tile++) = istr.get(); *(tile++) = istr.get();
*(tile++) = istr.get(); *(tile++) = istr.get();
*(tile++) = istr.get(); *(tile++) = istr.get();
*(tile++) = 0; // padding
} }
this->fill_overlap(); this->fill_overlap();
@ -475,10 +476,11 @@ struct TiledImage : public Image {
for (unsigned int j = 0; j < height; ++j) for (unsigned int j = 0; j < height; ++j)
for (unsigned int i = 0; i < width; ++i) for (unsigned int i = 0; i < width; ++i)
{ {
uint8_t const* tile = this->access_pixel(i, j); pvalue_t const* tile = this->access_pixel(i, j);
ostr << (char) *(tile++); ostr << (char) *(tile++);
ostr << (char) *(tile++); ostr << (char) *(tile++);
ostr << (char) *(tile++); ostr << (char) *(tile++);
tile++; // padding
} }
return true; return true;
@ -769,17 +771,16 @@ Image* rotate(Image const& src, double angle)
template<unsigned int W, unsigned int H> template<unsigned int W, unsigned int H>
void rotate_pixel(TiledImage<W, H> const& src, void rotate_pixel(TiledImage<W, H> const& src,
Point const& src_rotated_point, Point const& src_rotated_point,
uint8_t* rot_tile) pvalue_t* rot_tile)
{ {
unsigned int const quantize = 8; unsigned int const quantize = 8;
int const src_x = src_rotated_point.x >> 3; int const src_x = src_rotated_point.x >> 3;
int const src_y = src_rotated_point.y >> 3; int const src_y = src_rotated_point.y >> 3;
uint8_t const* src_index_1 = src.access_pixel(src_x, src_y); pvalue_t const* src_index_1 = src.access_pixel(src_x, src_y);
uint8_t const* src_index_2 = src_index_1 + 3; pvalue_t const* src_index_3 = src_index_1 + (W + 1) * PIXEL_SIZE;
uint8_t const* src_index_3 = src_index_1 + (W + 1) * 3; pvalue_t const* src_index_4 = src_index_3 + PIXEL_SIZE;
uint8_t const* src_index_4 = src_index_3 + 3;
// FIXME: deal with image border // FIXME: deal with image border
if (!src_index_4) if (!src_index_4)
@ -790,13 +791,41 @@ void rotate_pixel(TiledImage<W, H> const& src,
unsigned int const inv_x = quantize - x_delta; unsigned int const inv_x = quantize - x_delta;
unsigned int const inv_y = quantize - y_delta; unsigned int const inv_y = quantize - y_delta;
// No SIMD #ifndef SIMD
pvalue_t const* src_index_2 = src_index_1 + PIXEL_SIZE;
rot_tile[0] = ((src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y rot_tile[0] = ((src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y
+ (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta) >> 6; + (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta) >> 6;
rot_tile[1] = ((src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y rot_tile[1] = ((src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y
+ (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta) >> 6; + (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta) >> 6;
rot_tile[2] = ((src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y rot_tile[2] = ((src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y
+ (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta) >> 6; + (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta) >> 6;
#else
// X-axis
__m128i top = _mm_loadu_si128((__m128i*) src_index_1);
__m128i bottom = _mm_loadu_si128((__m128i*) src_index_3);
__m128i coef = _mm_set_epi16(x_delta, x_delta, x_delta, x_delta, inv_x, inv_x, inv_x, inv_x);
top = _mm_mullo_epi16(top, coef);
bottom = _mm_mullo_epi16(bottom, coef);
// Y-axis
coef = _mm_set1_epi16(inv_y);
top = _mm_mullo_epi16(top, coef);
coef = _mm_set1_epi16(y_delta);
bottom = _mm_mullo_epi16(bottom, coef);
top = _mm_add_epi16(top, bottom);
top = _mm_srli_epi16(top, 6);
rot_tile[0] = _mm_extract_epi16(top, 0) + _mm_extract_epi16(top, 4);
rot_tile[1] = _mm_extract_epi16(top, 1) + _mm_extract_epi16(top, 5);
rot_tile[2] = _mm_extract_epi16(top, 2) + _mm_extract_epi16(top, 6);
#endif // ! SIMD
} }
template<unsigned int W, unsigned int H> template<unsigned int W, unsigned int H>
@ -834,7 +863,7 @@ rotate(TiledImage<W, H> const& src, double angle)
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x) for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
{ {
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x; unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
uint8_t* runner = rotated->get_tile(rot_tile_index); pvalue_t* runner = rotated->get_tile(rot_tile_index);
for (unsigned int j = 0; j < H; ++j) for (unsigned int j = 0; j < H; ++j)
{ {
@ -854,11 +883,11 @@ rotate(TiledImage<W, H> const& src, double angle)
rotate_pixel(src, src_runner, runner); rotate_pixel(src, src_runner, runner);
} }
runner += 3; runner += PIXEL_SIZE;
} }
// Jump overlapping pixel // Jump overlapping pixel
runner += 3; runner += PIXEL_SIZE;
} }
} }
} }
@ -1049,6 +1078,8 @@ int main(int argc, char* argv[])
} }
double const step = 15; double const step = 15;
bool save_output_img = false;
bool print_each_run = false;
// No tile // No tile
Image img(argv[1]); Image img(argv[1]);
@ -1063,9 +1094,12 @@ int main(int argc, char* argv[])
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before); auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
average += duration_ms.count(); average += duration_ms.count();
//cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl; if (print_each_run)
cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (save_output_img)
rotated->save(get_save_path("rotated", rotation));
//rotated->save(get_save_path("rotated", rotation));
delete rotated; delete rotated;
++i; ++i;
} }
@ -1073,7 +1107,7 @@ int main(int argc, char* argv[])
cout << " average: " << average / i << "ms" << endl << endl; cout << " average: " << average / i << "ms" << endl << endl;
// Tile // Tile
TiledImage<16, 16> tiled_img(argv[1]); TiledImage<32, 32> tiled_img(argv[1]);
average = 0.0; average = 0.0;
i = 0; i = 0;
cout << "Tiled image" << endl; cout << "Tiled image" << endl;
@ -1085,9 +1119,12 @@ int main(int argc, char* argv[])
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before); auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
average += duration_ms.count(); average += duration_ms.count();
//cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl; if (print_each_run)
cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (save_output_img)
rotated->save(get_save_path("rotated_tiled", rotation));
//rotated->save(get_save_path("rotated_tiled", rotation));
delete rotated; delete rotated;
++i; ++i;
} }