Implement RGBX for tiled images.
Interpolation is now done with SIMD with -DSIMD for tiled images as well.
This commit is contained in:
parent
bcf16680ae
commit
a992cba5ed
5
TODO.md
5
TODO.md
|
@ -15,13 +15,14 @@
|
||||||
[X] Overlap
|
[X] Overlap
|
||||||
[-] Rotate in one temp tile then copy/move it
|
[-] Rotate in one temp tile then copy/move it
|
||||||
[X] Align tiles in memory
|
[X] Align tiles in memory
|
||||||
|
[ ] Touch beginning of tile
|
||||||
|
|
||||||
## Alignement
|
## Alignement
|
||||||
[X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place)
|
[X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place)
|
||||||
[X] Load pixels in 64-bit variable
|
[X] Load pixels in 64-bit variable
|
||||||
[X] Directly load in SIMD 128-bit variable
|
[X] Directly load in SIMD 128-bit variable
|
||||||
[ ] Align memory on 16 bytes (would require padding)
|
[ ] Align memory on 16 bytes (would require padding)
|
||||||
[ ] RGBX tiles
|
[X] RGBX tiles
|
||||||
|
|
||||||
## Layout
|
## Layout
|
||||||
[ ] Pack 4 neighbors in 16B structure (aligned)
|
[ ] Pack 4 neighbors in 16B structure (aligned)
|
||||||
|
@ -29,5 +30,5 @@
|
||||||
[ ] Spiral layout?
|
[ ] Spiral layout?
|
||||||
|
|
||||||
# Quality
|
# Quality
|
||||||
[X] Interpolate using SIMD, SSE (no big gain, alignement problem?)
|
[X] Interpolate using SIMD, SSE
|
||||||
[ ] Image borders
|
[ ] Image borders
|
||||||
|
|
121
rotation.cpp
121
rotation.cpp
|
@ -240,7 +240,7 @@ struct Image {
|
||||||
|
|
||||||
template<unsigned int W, unsigned int H>
|
template<unsigned int W, unsigned int H>
|
||||||
struct TiledImage : public Image {
|
struct TiledImage : public Image {
|
||||||
uint8_t* tiles;
|
pvalue_t* tiles;
|
||||||
unsigned int static const tile_w = W;
|
unsigned int static const tile_w = W;
|
||||||
unsigned int static const tile_h = H;
|
unsigned int static const tile_h = H;
|
||||||
|
|
||||||
|
@ -290,52 +290,52 @@ struct TiledImage : public Image {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t const*
|
pvalue_t const*
|
||||||
get_tile(unsigned int index) const
|
get_tile(unsigned int index) const
|
||||||
{
|
{
|
||||||
if (index >= nb_col_tile * nb_row_tile)
|
if (index >= nb_col_tile * nb_row_tile)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
return tiles + index * tile_size * 3;
|
return tiles + index * tile_size * PIXEL_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t*
|
pvalue_t*
|
||||||
get_tile(unsigned int index)
|
get_tile(unsigned int index)
|
||||||
{
|
{
|
||||||
if (index >= nb_col_tile * nb_row_tile)
|
if (index >= nb_col_tile * nb_row_tile)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
return tiles + index * tile_size * 3;
|
return tiles + index * tile_size * PIXEL_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t*
|
pvalue_t*
|
||||||
access_pixel(unsigned int x, unsigned int y)
|
access_pixel(unsigned int x, unsigned int y)
|
||||||
{
|
{
|
||||||
if (x >= width || y >= height)
|
if (x >= width || y >= height)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
unsigned int const tile_width = (tile_w + 1) * 3;
|
unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
|
||||||
|
|
||||||
unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w);
|
unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w);
|
||||||
uint8_t* tile = this->get_tile(tile_index);
|
pvalue_t* tile = this->get_tile(tile_index);
|
||||||
unsigned int const tile_j = y % tile_h;
|
unsigned int const tile_j = y % tile_h;
|
||||||
unsigned int const tile_i = x % tile_w;
|
unsigned int const tile_i = x % tile_w;
|
||||||
return tile + tile_j * tile_width + (tile_i * 3);
|
return tile + tile_j * tile_width + (tile_i * PIXEL_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t const*
|
pvalue_t const*
|
||||||
access_pixel(unsigned int x, unsigned int y) const
|
access_pixel(unsigned int x, unsigned int y) const
|
||||||
{
|
{
|
||||||
if (x >= width || y >= height)
|
if (x >= width || y >= height)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
unsigned int const tile_width = (tile_w + 1) * 3;
|
unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
|
||||||
|
|
||||||
unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w);
|
unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w);
|
||||||
const uint8_t* tile = this->get_tile(tile_index);
|
const pvalue_t* tile = this->get_tile(tile_index);
|
||||||
unsigned int const tile_j = y % tile_h;
|
unsigned int const tile_j = y % tile_h;
|
||||||
unsigned int const tile_i = x % tile_w;
|
unsigned int const tile_i = x % tile_w;
|
||||||
return tile + tile_j * tile_width + (tile_i * 3);
|
return tile + tile_j * tile_width + (tile_i * PIXEL_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
PackedPixel
|
PackedPixel
|
||||||
|
@ -367,15 +367,15 @@ struct TiledImage : public Image {
|
||||||
print_tile(unsigned int index) const
|
print_tile(unsigned int index) const
|
||||||
{
|
{
|
||||||
cout << "Tile[" << index << "]" << endl;
|
cout << "Tile[" << index << "]" << endl;
|
||||||
uint8_t const* tile = this->get_tile(index);
|
pvalue_t const* tile = this->get_tile(index);
|
||||||
unsigned int const tile_width = (tile_w + 1) * 3;
|
unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
|
||||||
for (unsigned int j = 0; j < tile_h + 1; ++j)
|
for (unsigned int j = 0; j < tile_h + 1; ++j)
|
||||||
{
|
{
|
||||||
for (unsigned int i = 0; i < tile_w + 1; ++i)
|
for (unsigned int i = 0; i < tile_w + 1; ++i)
|
||||||
{
|
{
|
||||||
if (i != 0)
|
if (i != 0)
|
||||||
cout << ", ";
|
cout << ", ";
|
||||||
uint8_t const* p = tile + j * tile_width + i * 3;
|
pvalue_t const* p = tile + j * tile_width + i * PIXEL_SIZE;
|
||||||
cout << (int) *p << " " << (int) *(p + 1) << " " << (int) *(p + 2);
|
cout << (int) *p << " " << (int) *(p + 1) << " " << (int) *(p + 2);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -386,7 +386,7 @@ struct TiledImage : public Image {
|
||||||
|
|
||||||
void fill_overlap()
|
void fill_overlap()
|
||||||
{
|
{
|
||||||
unsigned int const tile_width = (W + 1) * 3;
|
unsigned int const tile_width = (W + 1) * PIXEL_SIZE;
|
||||||
|
|
||||||
for (int j = nb_row_tile - 1; j >= 0; --j)
|
for (int j = nb_row_tile - 1; j >= 0; --j)
|
||||||
for (unsigned int i = 0; i < nb_col_tile; ++i)
|
for (unsigned int i = 0; i < nb_col_tile; ++i)
|
||||||
|
@ -394,21 +394,21 @@ struct TiledImage : public Image {
|
||||||
// copy last line overlap
|
// copy last line overlap
|
||||||
if (j != (int) nb_row_tile - 1)
|
if (j != (int) nb_row_tile - 1)
|
||||||
{
|
{
|
||||||
uint8_t const* tile_src = this->access_pixel(i * W, (j + 1) * H);
|
pvalue_t const* tile_src = this->access_pixel(i * W, (j + 1) * H);
|
||||||
uint8_t* tile_dst = this->access_pixel(i * W, j * H);
|
pvalue_t* tile_dst = this->access_pixel(i * W, j * H);
|
||||||
tile_dst += H * tile_width;
|
tile_dst += H * tile_width;
|
||||||
memcpy(tile_dst, tile_src, tile_width * sizeof (uint8_t));
|
memcpy(tile_dst, tile_src, tile_width * sizeof (pvalue_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy last col overlap
|
// copy last col overlap
|
||||||
if (i != nb_col_tile - 1)
|
if (i != nb_col_tile - 1)
|
||||||
{
|
{
|
||||||
uint8_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile);
|
pvalue_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile);
|
||||||
uint8_t* tile_dst = this->get_tile(i + j * nb_col_tile);
|
pvalue_t* tile_dst = this->get_tile(i + j * nb_col_tile);
|
||||||
tile_dst += W * 3;
|
tile_dst += W * PIXEL_SIZE;
|
||||||
for (unsigned int y = 0; y < H; ++y)
|
for (unsigned int y = 0; y < H; ++y)
|
||||||
{
|
{
|
||||||
memcpy(tile_dst, tile_src, 3 * sizeof (uint8_t));
|
memcpy(tile_dst, tile_src, PIXEL_SIZE * sizeof (pvalue_t));
|
||||||
tile_src += tile_width;
|
tile_src += tile_width;
|
||||||
tile_dst += tile_width;
|
tile_dst += tile_width;
|
||||||
}
|
}
|
||||||
|
@ -447,8 +447,8 @@ struct TiledImage : public Image {
|
||||||
++nb_row_tile;
|
++nb_row_tile;
|
||||||
|
|
||||||
unsigned int const nb_tiles = nb_col_tile * nb_row_tile;
|
unsigned int const nb_tiles = nb_col_tile * nb_row_tile;
|
||||||
tiles = new uint8_t[nb_tiles * tile_size * 3];
|
tiles = new pvalue_t[nb_tiles * tile_size * PIXEL_SIZE];
|
||||||
memset(tiles, 0, nb_tiles * tile_size * 3 * sizeof (uint8_t));
|
memset(tiles, 0, nb_tiles * tile_size * PIXEL_SIZE * sizeof (pvalue_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool read_body(std::ifstream& istr) override
|
virtual bool read_body(std::ifstream& istr) override
|
||||||
|
@ -459,10 +459,11 @@ struct TiledImage : public Image {
|
||||||
for (unsigned int j = 0; j < height; ++j)
|
for (unsigned int j = 0; j < height; ++j)
|
||||||
for (unsigned int i = 0; i < width; ++i)
|
for (unsigned int i = 0; i < width; ++i)
|
||||||
{
|
{
|
||||||
uint8_t* tile = this->access_pixel(i, j);
|
pvalue_t* tile = this->access_pixel(i, j);
|
||||||
*(tile++) = istr.get();
|
*(tile++) = istr.get();
|
||||||
*(tile++) = istr.get();
|
*(tile++) = istr.get();
|
||||||
*(tile++) = istr.get();
|
*(tile++) = istr.get();
|
||||||
|
*(tile++) = 0; // padding
|
||||||
}
|
}
|
||||||
|
|
||||||
this->fill_overlap();
|
this->fill_overlap();
|
||||||
|
@ -475,10 +476,11 @@ struct TiledImage : public Image {
|
||||||
for (unsigned int j = 0; j < height; ++j)
|
for (unsigned int j = 0; j < height; ++j)
|
||||||
for (unsigned int i = 0; i < width; ++i)
|
for (unsigned int i = 0; i < width; ++i)
|
||||||
{
|
{
|
||||||
uint8_t const* tile = this->access_pixel(i, j);
|
pvalue_t const* tile = this->access_pixel(i, j);
|
||||||
ostr << (char) *(tile++);
|
ostr << (char) *(tile++);
|
||||||
ostr << (char) *(tile++);
|
ostr << (char) *(tile++);
|
||||||
ostr << (char) *(tile++);
|
ostr << (char) *(tile++);
|
||||||
|
tile++; // padding
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -769,17 +771,16 @@ Image* rotate(Image const& src, double angle)
|
||||||
template<unsigned int W, unsigned int H>
|
template<unsigned int W, unsigned int H>
|
||||||
void rotate_pixel(TiledImage<W, H> const& src,
|
void rotate_pixel(TiledImage<W, H> const& src,
|
||||||
Point const& src_rotated_point,
|
Point const& src_rotated_point,
|
||||||
uint8_t* rot_tile)
|
pvalue_t* rot_tile)
|
||||||
{
|
{
|
||||||
unsigned int const quantize = 8;
|
unsigned int const quantize = 8;
|
||||||
|
|
||||||
int const src_x = src_rotated_point.x >> 3;
|
int const src_x = src_rotated_point.x >> 3;
|
||||||
int const src_y = src_rotated_point.y >> 3;
|
int const src_y = src_rotated_point.y >> 3;
|
||||||
|
|
||||||
uint8_t const* src_index_1 = src.access_pixel(src_x, src_y);
|
pvalue_t const* src_index_1 = src.access_pixel(src_x, src_y);
|
||||||
uint8_t const* src_index_2 = src_index_1 + 3;
|
pvalue_t const* src_index_3 = src_index_1 + (W + 1) * PIXEL_SIZE;
|
||||||
uint8_t const* src_index_3 = src_index_1 + (W + 1) * 3;
|
pvalue_t const* src_index_4 = src_index_3 + PIXEL_SIZE;
|
||||||
uint8_t const* src_index_4 = src_index_3 + 3;
|
|
||||||
|
|
||||||
// FIXME: deal with image border
|
// FIXME: deal with image border
|
||||||
if (!src_index_4)
|
if (!src_index_4)
|
||||||
|
@ -790,13 +791,41 @@ void rotate_pixel(TiledImage<W, H> const& src,
|
||||||
unsigned int const inv_x = quantize - x_delta;
|
unsigned int const inv_x = quantize - x_delta;
|
||||||
unsigned int const inv_y = quantize - y_delta;
|
unsigned int const inv_y = quantize - y_delta;
|
||||||
|
|
||||||
// No SIMD
|
#ifndef SIMD
|
||||||
|
|
||||||
|
pvalue_t const* src_index_2 = src_index_1 + PIXEL_SIZE;
|
||||||
|
|
||||||
rot_tile[0] = ((src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y
|
rot_tile[0] = ((src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y
|
||||||
+ (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta) >> 6;
|
+ (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta) >> 6;
|
||||||
rot_tile[1] = ((src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y
|
rot_tile[1] = ((src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y
|
||||||
+ (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta) >> 6;
|
+ (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta) >> 6;
|
||||||
rot_tile[2] = ((src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y
|
rot_tile[2] = ((src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y
|
||||||
+ (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta) >> 6;
|
+ (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta) >> 6;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// X-axis
|
||||||
|
__m128i top = _mm_loadu_si128((__m128i*) src_index_1);
|
||||||
|
__m128i bottom = _mm_loadu_si128((__m128i*) src_index_3);
|
||||||
|
__m128i coef = _mm_set_epi16(x_delta, x_delta, x_delta, x_delta, inv_x, inv_x, inv_x, inv_x);
|
||||||
|
top = _mm_mullo_epi16(top, coef);
|
||||||
|
bottom = _mm_mullo_epi16(bottom, coef);
|
||||||
|
|
||||||
|
// Y-axis
|
||||||
|
coef = _mm_set1_epi16(inv_y);
|
||||||
|
top = _mm_mullo_epi16(top, coef);
|
||||||
|
coef = _mm_set1_epi16(y_delta);
|
||||||
|
bottom = _mm_mullo_epi16(bottom, coef);
|
||||||
|
top = _mm_add_epi16(top, bottom);
|
||||||
|
|
||||||
|
top = _mm_srli_epi16(top, 6);
|
||||||
|
|
||||||
|
rot_tile[0] = _mm_extract_epi16(top, 0) + _mm_extract_epi16(top, 4);
|
||||||
|
rot_tile[1] = _mm_extract_epi16(top, 1) + _mm_extract_epi16(top, 5);
|
||||||
|
rot_tile[2] = _mm_extract_epi16(top, 2) + _mm_extract_epi16(top, 6);
|
||||||
|
|
||||||
|
|
||||||
|
#endif // ! SIMD
|
||||||
}
|
}
|
||||||
|
|
||||||
template<unsigned int W, unsigned int H>
|
template<unsigned int W, unsigned int H>
|
||||||
|
@ -834,7 +863,7 @@ rotate(TiledImage<W, H> const& src, double angle)
|
||||||
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
|
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
|
||||||
{
|
{
|
||||||
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
|
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
|
||||||
uint8_t* runner = rotated->get_tile(rot_tile_index);
|
pvalue_t* runner = rotated->get_tile(rot_tile_index);
|
||||||
|
|
||||||
for (unsigned int j = 0; j < H; ++j)
|
for (unsigned int j = 0; j < H; ++j)
|
||||||
{
|
{
|
||||||
|
@ -854,11 +883,11 @@ rotate(TiledImage<W, H> const& src, double angle)
|
||||||
rotate_pixel(src, src_runner, runner);
|
rotate_pixel(src, src_runner, runner);
|
||||||
}
|
}
|
||||||
|
|
||||||
runner += 3;
|
runner += PIXEL_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Jump overlapping pixel
|
// Jump overlapping pixel
|
||||||
runner += 3;
|
runner += PIXEL_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1049,6 +1078,8 @@ int main(int argc, char* argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
double const step = 15;
|
double const step = 15;
|
||||||
|
bool save_output_img = false;
|
||||||
|
bool print_each_run = false;
|
||||||
|
|
||||||
// No tile
|
// No tile
|
||||||
Image img(argv[1]);
|
Image img(argv[1]);
|
||||||
|
@ -1063,9 +1094,12 @@ int main(int argc, char* argv[])
|
||||||
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
|
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
|
||||||
average += duration_ms.count();
|
average += duration_ms.count();
|
||||||
|
|
||||||
//cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl;
|
if (print_each_run)
|
||||||
|
cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl;
|
||||||
|
|
||||||
|
if (save_output_img)
|
||||||
|
rotated->save(get_save_path("rotated", rotation));
|
||||||
|
|
||||||
//rotated->save(get_save_path("rotated", rotation));
|
|
||||||
delete rotated;
|
delete rotated;
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
@ -1073,7 +1107,7 @@ int main(int argc, char* argv[])
|
||||||
cout << " average: " << average / i << "ms" << endl << endl;
|
cout << " average: " << average / i << "ms" << endl << endl;
|
||||||
|
|
||||||
// Tile
|
// Tile
|
||||||
TiledImage<16, 16> tiled_img(argv[1]);
|
TiledImage<32, 32> tiled_img(argv[1]);
|
||||||
average = 0.0;
|
average = 0.0;
|
||||||
i = 0;
|
i = 0;
|
||||||
cout << "Tiled image" << endl;
|
cout << "Tiled image" << endl;
|
||||||
|
@ -1085,9 +1119,12 @@ int main(int argc, char* argv[])
|
||||||
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
|
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
|
||||||
average += duration_ms.count();
|
average += duration_ms.count();
|
||||||
|
|
||||||
//cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl;
|
if (print_each_run)
|
||||||
|
cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl;
|
||||||
|
|
||||||
|
if (save_output_img)
|
||||||
|
rotated->save(get_save_path("rotated_tiled", rotation));
|
||||||
|
|
||||||
//rotated->save(get_save_path("rotated_tiled", rotation));
|
|
||||||
delete rotated;
|
delete rotated;
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue