Implement RGBX for tiled images.

Interpolation is now done with SIMD with -DSIMD for tiled images as
well.
master
Fabien Freling 2014-07-16 20:44:32 +02:00
parent bcf16680ae
commit a992cba5ed
2 changed files with 82 additions and 44 deletions

View File

@ -15,13 +15,14 @@
[X] Overlap
[-] Rotate in one temp tile then copy/move it
[X] Align tiles in memory
[ ] Touch beginning of tile
## Alignement
[X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place)
[X] Load pixels in 64-bit variable
[X] Directly load in SIMD 128-bit variable
[ ] Align memory on 16 bytes (would require padding)
[ ] RGBX tiles
[X] RGBX tiles
## Layout
[ ] Pack 4 neighbors in 16B structure (aligned)
@ -29,5 +30,5 @@
[ ] Spiral layout?
# Quality
[X] Interpolate using SIMD, SSE (no big gain, alignement problem?)
[X] Interpolate using SIMD, SSE
[ ] Image borders

View File

@ -240,7 +240,7 @@ struct Image {
template<unsigned int W, unsigned int H>
struct TiledImage : public Image {
uint8_t* tiles;
pvalue_t* tiles;
unsigned int static const tile_w = W;
unsigned int static const tile_h = H;
@ -290,52 +290,52 @@ struct TiledImage : public Image {
}
}
uint8_t const*
pvalue_t const*
get_tile(unsigned int index) const
{
if (index >= nb_col_tile * nb_row_tile)
return nullptr;
return tiles + index * tile_size * 3;
return tiles + index * tile_size * PIXEL_SIZE;
}
uint8_t*
pvalue_t*
get_tile(unsigned int index)
{
if (index >= nb_col_tile * nb_row_tile)
return nullptr;
return tiles + index * tile_size * 3;
return tiles + index * tile_size * PIXEL_SIZE;
}
uint8_t*
pvalue_t*
access_pixel(unsigned int x, unsigned int y)
{
if (x >= width || y >= height)
return nullptr;
unsigned int const tile_width = (tile_w + 1) * 3;
unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w);
uint8_t* tile = this->get_tile(tile_index);
pvalue_t* tile = this->get_tile(tile_index);
unsigned int const tile_j = y % tile_h;
unsigned int const tile_i = x % tile_w;
return tile + tile_j * tile_width + (tile_i * 3);
return tile + tile_j * tile_width + (tile_i * PIXEL_SIZE);
}
uint8_t const*
pvalue_t const*
access_pixel(unsigned int x, unsigned int y) const
{
if (x >= width || y >= height)
return nullptr;
unsigned int const tile_width = (tile_w + 1) * 3;
unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
unsigned int const tile_index = (y / tile_h) * nb_col_tile + (x / tile_w);
const uint8_t* tile = this->get_tile(tile_index);
const pvalue_t* tile = this->get_tile(tile_index);
unsigned int const tile_j = y % tile_h;
unsigned int const tile_i = x % tile_w;
return tile + tile_j * tile_width + (tile_i * 3);
return tile + tile_j * tile_width + (tile_i * PIXEL_SIZE);
}
PackedPixel
@ -367,15 +367,15 @@ struct TiledImage : public Image {
print_tile(unsigned int index) const
{
cout << "Tile[" << index << "]" << endl;
uint8_t const* tile = this->get_tile(index);
unsigned int const tile_width = (tile_w + 1) * 3;
pvalue_t const* tile = this->get_tile(index);
unsigned int const tile_width = (tile_w + 1) * PIXEL_SIZE;
for (unsigned int j = 0; j < tile_h + 1; ++j)
{
for (unsigned int i = 0; i < tile_w + 1; ++i)
{
if (i != 0)
cout << ", ";
uint8_t const* p = tile + j * tile_width + i * 3;
pvalue_t const* p = tile + j * tile_width + i * PIXEL_SIZE;
cout << (int) *p << " " << (int) *(p + 1) << " " << (int) *(p + 2);
}
@ -386,7 +386,7 @@ struct TiledImage : public Image {
void fill_overlap()
{
unsigned int const tile_width = (W + 1) * 3;
unsigned int const tile_width = (W + 1) * PIXEL_SIZE;
for (int j = nb_row_tile - 1; j >= 0; --j)
for (unsigned int i = 0; i < nb_col_tile; ++i)
@ -394,21 +394,21 @@ struct TiledImage : public Image {
// copy last line overlap
if (j != (int) nb_row_tile - 1)
{
uint8_t const* tile_src = this->access_pixel(i * W, (j + 1) * H);
uint8_t* tile_dst = this->access_pixel(i * W, j * H);
pvalue_t const* tile_src = this->access_pixel(i * W, (j + 1) * H);
pvalue_t* tile_dst = this->access_pixel(i * W, j * H);
tile_dst += H * tile_width;
memcpy(tile_dst, tile_src, tile_width * sizeof (uint8_t));
memcpy(tile_dst, tile_src, tile_width * sizeof (pvalue_t));
}
// copy last col overlap
if (i != nb_col_tile - 1)
{
uint8_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile);
uint8_t* tile_dst = this->get_tile(i + j * nb_col_tile);
tile_dst += W * 3;
pvalue_t* tile_src = this->get_tile(i + 1 + j * nb_col_tile);
pvalue_t* tile_dst = this->get_tile(i + j * nb_col_tile);
tile_dst += W * PIXEL_SIZE;
for (unsigned int y = 0; y < H; ++y)
{
memcpy(tile_dst, tile_src, 3 * sizeof (uint8_t));
memcpy(tile_dst, tile_src, PIXEL_SIZE * sizeof (pvalue_t));
tile_src += tile_width;
tile_dst += tile_width;
}
@ -447,8 +447,8 @@ struct TiledImage : public Image {
++nb_row_tile;
unsigned int const nb_tiles = nb_col_tile * nb_row_tile;
tiles = new uint8_t[nb_tiles * tile_size * 3];
memset(tiles, 0, nb_tiles * tile_size * 3 * sizeof (uint8_t));
tiles = new pvalue_t[nb_tiles * tile_size * PIXEL_SIZE];
memset(tiles, 0, nb_tiles * tile_size * PIXEL_SIZE * sizeof (pvalue_t));
}
virtual bool read_body(std::ifstream& istr) override
@ -459,10 +459,11 @@ struct TiledImage : public Image {
for (unsigned int j = 0; j < height; ++j)
for (unsigned int i = 0; i < width; ++i)
{
uint8_t* tile = this->access_pixel(i, j);
pvalue_t* tile = this->access_pixel(i, j);
*(tile++) = istr.get();
*(tile++) = istr.get();
*(tile++) = istr.get();
*(tile++) = 0; // padding
}
this->fill_overlap();
@ -475,10 +476,11 @@ struct TiledImage : public Image {
for (unsigned int j = 0; j < height; ++j)
for (unsigned int i = 0; i < width; ++i)
{
uint8_t const* tile = this->access_pixel(i, j);
pvalue_t const* tile = this->access_pixel(i, j);
ostr << (char) *(tile++);
ostr << (char) *(tile++);
ostr << (char) *(tile++);
tile++; // padding
}
return true;
@ -769,17 +771,16 @@ Image* rotate(Image const& src, double angle)
template<unsigned int W, unsigned int H>
void rotate_pixel(TiledImage<W, H> const& src,
Point const& src_rotated_point,
uint8_t* rot_tile)
pvalue_t* rot_tile)
{
unsigned int const quantize = 8;
int const src_x = src_rotated_point.x >> 3;
int const src_y = src_rotated_point.y >> 3;
uint8_t const* src_index_1 = src.access_pixel(src_x, src_y);
uint8_t const* src_index_2 = src_index_1 + 3;
uint8_t const* src_index_3 = src_index_1 + (W + 1) * 3;
uint8_t const* src_index_4 = src_index_3 + 3;
pvalue_t const* src_index_1 = src.access_pixel(src_x, src_y);
pvalue_t const* src_index_3 = src_index_1 + (W + 1) * PIXEL_SIZE;
pvalue_t const* src_index_4 = src_index_3 + PIXEL_SIZE;
// FIXME: deal with image border
if (!src_index_4)
@ -790,13 +791,41 @@ void rotate_pixel(TiledImage<W, H> const& src,
unsigned int const inv_x = quantize - x_delta;
unsigned int const inv_y = quantize - y_delta;
// No SIMD
#ifndef SIMD
pvalue_t const* src_index_2 = src_index_1 + PIXEL_SIZE;
rot_tile[0] = ((src_index_1[0] * inv_x + src_index_2[0] * x_delta) * inv_y
+ (src_index_3[0] * inv_x + src_index_4[0] * x_delta) * y_delta) >> 6;
rot_tile[1] = ((src_index_1[1] * inv_x + src_index_2[1] * x_delta) * inv_y
+ (src_index_3[1] * inv_x + src_index_4[1] * x_delta) * y_delta) >> 6;
rot_tile[2] = ((src_index_1[2] * inv_x + src_index_2[2] * x_delta) * inv_y
+ (src_index_3[2] * inv_x + src_index_4[2] * x_delta) * y_delta) >> 6;
#else
// X-axis
__m128i top = _mm_loadu_si128((__m128i*) src_index_1);
__m128i bottom = _mm_loadu_si128((__m128i*) src_index_3);
__m128i coef = _mm_set_epi16(x_delta, x_delta, x_delta, x_delta, inv_x, inv_x, inv_x, inv_x);
top = _mm_mullo_epi16(top, coef);
bottom = _mm_mullo_epi16(bottom, coef);
// Y-axis
coef = _mm_set1_epi16(inv_y);
top = _mm_mullo_epi16(top, coef);
coef = _mm_set1_epi16(y_delta);
bottom = _mm_mullo_epi16(bottom, coef);
top = _mm_add_epi16(top, bottom);
top = _mm_srli_epi16(top, 6);
rot_tile[0] = _mm_extract_epi16(top, 0) + _mm_extract_epi16(top, 4);
rot_tile[1] = _mm_extract_epi16(top, 1) + _mm_extract_epi16(top, 5);
rot_tile[2] = _mm_extract_epi16(top, 2) + _mm_extract_epi16(top, 6);
#endif // ! SIMD
}
template<unsigned int W, unsigned int H>
@ -834,7 +863,7 @@ rotate(TiledImage<W, H> const& src, double angle)
for (unsigned int x = 0; x < rotated->nb_col_tile; ++x)
{
unsigned int const rot_tile_index = y * rotated->nb_col_tile + x;
uint8_t* runner = rotated->get_tile(rot_tile_index);
pvalue_t* runner = rotated->get_tile(rot_tile_index);
for (unsigned int j = 0; j < H; ++j)
{
@ -854,11 +883,11 @@ rotate(TiledImage<W, H> const& src, double angle)
rotate_pixel(src, src_runner, runner);
}
runner += 3;
runner += PIXEL_SIZE;
}
// Jump overlapping pixel
runner += 3;
runner += PIXEL_SIZE;
}
}
}
@ -1049,6 +1078,8 @@ int main(int argc, char* argv[])
}
double const step = 15;
bool save_output_img = false;
bool print_each_run = false;
// No tile
Image img(argv[1]);
@ -1063,9 +1094,12 @@ int main(int argc, char* argv[])
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
average += duration_ms.count();
//cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (print_each_run)
cout << "rotate(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (save_output_img)
rotated->save(get_save_path("rotated", rotation));
//rotated->save(get_save_path("rotated", rotation));
delete rotated;
++i;
}
@ -1073,7 +1107,7 @@ int main(int argc, char* argv[])
cout << " average: " << average / i << "ms" << endl << endl;
// Tile
TiledImage<16, 16> tiled_img(argv[1]);
TiledImage<32, 32> tiled_img(argv[1]);
average = 0.0;
i = 0;
cout << "Tiled image" << endl;
@ -1085,9 +1119,12 @@ int main(int argc, char* argv[])
auto const duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(after - before);
average += duration_ms.count();
//cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (print_each_run)
cout << "rotate tiled(" << rotation << "): " << duration_ms.count() << " ms" << endl;
if (save_output_img)
rotated->save(get_save_path("rotated_tiled", rotation));
//rotated->save(get_save_path("rotated_tiled", rotation));
delete rotated;
++i;
}