Remove RGBX structure.
The pixels are still packed as RGBX in memory but no structure is created, it’s just a contiguous buffer. Interpolation is now done with SIMD on integer values. - Add SIMD define.
This commit is contained in:
parent
cce4d45ba6
commit
bcf16680ae
5
Makefile
5
Makefile
|
@ -1,10 +1,11 @@
|
||||||
CXX = clang++
|
CXX = clang++
|
||||||
CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math -Werror -g
|
CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math -g -Werror
|
||||||
|
DEFINES = -DSIMD
|
||||||
BUILD_DIR=/tmp
|
BUILD_DIR=/tmp
|
||||||
IMG=img/lena.ppm
|
IMG=img/lena.ppm
|
||||||
|
|
||||||
all: rotation.cpp
|
all: rotation.cpp
|
||||||
$(CXX) $(CXXFLAGS) $< -o $(BUILD_DIR)/rotation
|
$(CXX) $(CXXFLAGS) $(DEFINES) $< -o $(BUILD_DIR)/rotation
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@rm -f *~ *.o .*.swp *.ppm cachegrind.out.*
|
@rm -f *~ *.o .*.swp *.ppm cachegrind.out.*
|
||||||
|
|
6
TODO.md
6
TODO.md
|
@ -18,8 +18,10 @@
|
||||||
|
|
||||||
## Alignement
|
## Alignement
|
||||||
[X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place)
|
[X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place)
|
||||||
[ ] Load pixels in 64-bit variable
|
[X] Load pixels in 64-bit variable
|
||||||
[ ] Align memory on 16 bytes
|
[X] Directly load in SIMD 128-bit variable
|
||||||
|
[ ] Align memory on 16 bytes (would require padding)
|
||||||
|
[ ] RGBX tiles
|
||||||
|
|
||||||
## Layout
|
## Layout
|
||||||
[ ] Pack 4 neighbors in 16B structure (aligned)
|
[ ] Pack 4 neighbors in 16B structure (aligned)
|
||||||
|
|
98
rotation.cpp
98
rotation.cpp
|
@ -65,18 +65,8 @@ uint8_t interpolate_packed(uint32_t pack, double x, double x_inv, double y, doub
|
||||||
// Pixel
|
// Pixel
|
||||||
//
|
//
|
||||||
|
|
||||||
typedef uint8_t pvalue_t;
|
typedef uint16_t pvalue_t; // pixel value type
|
||||||
struct pixel_t {
|
#define PIXEL_SIZE 4
|
||||||
pvalue_t r;
|
|
||||||
pvalue_t g;
|
|
||||||
pvalue_t b;
|
|
||||||
pvalue_t x; // padding
|
|
||||||
|
|
||||||
pixel_t()
|
|
||||||
: r(0), g(0), b(0), x(0)
|
|
||||||
{}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,7 +78,7 @@ struct pixel_t {
|
||||||
struct Image {
|
struct Image {
|
||||||
unsigned int width;
|
unsigned int width;
|
||||||
unsigned int height;
|
unsigned int height;
|
||||||
pixel_t* buffer;
|
pvalue_t* buffer;
|
||||||
|
|
||||||
Image()
|
Image()
|
||||||
: width(0)
|
: width(0)
|
||||||
|
@ -105,7 +95,8 @@ struct Image {
|
||||||
{
|
{
|
||||||
this->width = w;
|
this->width = w;
|
||||||
this->height = h;
|
this->height = h;
|
||||||
buffer = new pixel_t[width * height];
|
buffer = new pvalue_t[width * height * PIXEL_SIZE];
|
||||||
|
memset(buffer, 0, width * height * PIXEL_SIZE * sizeof (pvalue_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
Image(string const& path)
|
Image(string const& path)
|
||||||
|
@ -217,15 +208,15 @@ struct Image {
|
||||||
virtual bool read_body(std::ifstream& istr)
|
virtual bool read_body(std::ifstream& istr)
|
||||||
{
|
{
|
||||||
unsigned int const nb_pixels = width * height;
|
unsigned int const nb_pixels = width * height;
|
||||||
buffer = new pixel_t[nb_pixels];
|
buffer = new pvalue_t[nb_pixels * PIXEL_SIZE];
|
||||||
|
|
||||||
pixel_t* pixel = buffer;
|
pvalue_t* pixel = buffer;
|
||||||
for (unsigned int i = 0; i < nb_pixels; ++i)
|
for (unsigned int i = 0; i < nb_pixels; ++i)
|
||||||
{
|
{
|
||||||
pixel->r = istr.get();
|
*(pixel++) = istr.get();
|
||||||
pixel->g = istr.get();
|
*(pixel++) = istr.get();
|
||||||
pixel->b = istr.get();
|
*(pixel++) = istr.get();
|
||||||
++pixel;
|
*(pixel++) = 0; // padding
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -234,13 +225,13 @@ struct Image {
|
||||||
virtual bool write_body(std::ofstream& ostr) const
|
virtual bool write_body(std::ofstream& ostr) const
|
||||||
{
|
{
|
||||||
unsigned int const nb_pixels = width * height;
|
unsigned int const nb_pixels = width * height;
|
||||||
pixel_t* pixel = buffer;
|
pvalue_t* pixel = buffer;
|
||||||
for (unsigned int i = 0; i < nb_pixels; ++i)
|
for (unsigned int i = 0; i < nb_pixels; ++i)
|
||||||
{
|
{
|
||||||
ostr << (char) pixel->r;
|
ostr << (char) *(pixel++);
|
||||||
ostr << (char) pixel->g;
|
ostr << (char) *(pixel++);
|
||||||
ostr << (char) pixel->b;
|
ostr << (char) *(pixel++);
|
||||||
++pixel;
|
pixel++; // padding
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -643,20 +634,19 @@ inline
|
||||||
void rotate_pixel(Image const& src,
|
void rotate_pixel(Image const& src,
|
||||||
Point const& src_rotated_point,
|
Point const& src_rotated_point,
|
||||||
unsigned int const src_limit,
|
unsigned int const src_limit,
|
||||||
pixel_t* rotate_buffer, unsigned int rot_index)
|
pvalue_t* rotate_buffer, unsigned int rot_index)
|
||||||
{
|
{
|
||||||
unsigned int const quantize = 8;
|
unsigned int const quantize = 8;
|
||||||
|
|
||||||
int const src_x = src_rotated_point.x >> 3;
|
int const src_x = src_rotated_point.x >> 3;
|
||||||
int const src_y = src_rotated_point.y >> 3;
|
int const src_y = src_rotated_point.y >> 3;
|
||||||
|
|
||||||
unsigned int src_index = src_y * src.width + src_x;
|
unsigned int src_index = (src_y * src.width + src_x) * PIXEL_SIZE;
|
||||||
|
|
||||||
// Bilinear interpolation
|
// Bilinear interpolation
|
||||||
unsigned int src_index_1 = src_index;
|
unsigned int src_index_1 = src_index;
|
||||||
unsigned int src_index_2 = src_index_1 + 1;
|
unsigned int src_index_3 = src_index_1 + PIXEL_SIZE * src.width;
|
||||||
unsigned int src_index_3 = src_index_1 + 1 * src.width;
|
unsigned int src_index_4 = src_index_3 + PIXEL_SIZE;
|
||||||
unsigned int src_index_4 = src_index_3 + 1;
|
|
||||||
|
|
||||||
// Out-of-bounds check
|
// Out-of-bounds check
|
||||||
if (src_index_4 >= src_limit)
|
if (src_index_4 >= src_limit)
|
||||||
|
@ -667,13 +657,41 @@ void rotate_pixel(Image const& src,
|
||||||
unsigned int const inv_x = quantize - x_delta;
|
unsigned int const inv_x = quantize - x_delta;
|
||||||
unsigned int const inv_y = quantize - y_delta;
|
unsigned int const inv_y = quantize - y_delta;
|
||||||
|
|
||||||
// No SIMD
|
#ifndef SIMD
|
||||||
rotate_buffer[rot_index].r = ((src.buffer[src_index_1].r * inv_x + src.buffer[src_index_2].r * x_delta) * inv_y
|
|
||||||
+ (src.buffer[src_index_3].r * inv_x + src.buffer[src_index_4].r * x_delta) * y_delta) >> 6;
|
unsigned int src_index_2 = src_index_1 + PIXEL_SIZE;
|
||||||
rotate_buffer[rot_index].g = ((src.buffer[src_index_1].g * inv_x + src.buffer[src_index_2].g * x_delta) * inv_y
|
|
||||||
+ (src.buffer[src_index_3].g * inv_x + src.buffer[src_index_4].g * x_delta) * y_delta) >> 6;
|
rotate_buffer[rot_index] = ((src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y
|
||||||
rotate_buffer[rot_index].b = ((src.buffer[src_index_1].b * inv_x + src.buffer[src_index_2].b * x_delta) * inv_y
|
+ (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta) >> 6;
|
||||||
+ (src.buffer[src_index_3].b * inv_x + src.buffer[src_index_4].b * x_delta) * y_delta) >> 6;
|
rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
|
||||||
|
+ (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6;
|
||||||
|
rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
|
||||||
|
+ (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta) >> 6;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// X-axis
|
||||||
|
__m128i top = _mm_loadu_si128((__m128i*) &src.buffer[src_index_1]);
|
||||||
|
__m128i bottom = _mm_loadu_si128((__m128i*) &src.buffer[src_index_3]);
|
||||||
|
__m128i coef = _mm_set_epi16(x_delta, x_delta, x_delta, x_delta, inv_x, inv_x, inv_x, inv_x);
|
||||||
|
top = _mm_mullo_epi16(top, coef);
|
||||||
|
bottom = _mm_mullo_epi16(bottom, coef);
|
||||||
|
|
||||||
|
// Y-axis
|
||||||
|
coef = _mm_set1_epi16(inv_y);
|
||||||
|
top = _mm_mullo_epi16(top, coef);
|
||||||
|
coef = _mm_set1_epi16(y_delta);
|
||||||
|
bottom = _mm_mullo_epi16(bottom, coef);
|
||||||
|
top = _mm_add_epi16(top, bottom);
|
||||||
|
|
||||||
|
top = _mm_srli_epi16(top, 6);
|
||||||
|
|
||||||
|
rotate_buffer[rot_index] = _mm_extract_epi16(top, 0) + _mm_extract_epi16(top, 4);
|
||||||
|
rotate_buffer[rot_index + 1] = _mm_extract_epi16(top, 1) + _mm_extract_epi16(top, 5);
|
||||||
|
rotate_buffer[rot_index + 2] = _mm_extract_epi16(top, 2) + _mm_extract_epi16(top, 6);
|
||||||
|
|
||||||
|
|
||||||
|
#endif // ! SIMD
|
||||||
}
|
}
|
||||||
|
|
||||||
Image* rotate(Image const& src, double angle)
|
Image* rotate(Image const& src, double angle)
|
||||||
|
@ -706,13 +724,13 @@ Image* rotate(Image const& src, double angle)
|
||||||
round_if_very_small(src_delta_y.x);
|
round_if_very_small(src_delta_y.x);
|
||||||
round_if_very_small(src_delta_y.y);
|
round_if_very_small(src_delta_y.y);
|
||||||
|
|
||||||
unsigned int const src_limit = src.width * src.height * 3;
|
unsigned int const src_limit = src.width * src.height * PIXEL_SIZE;
|
||||||
|
|
||||||
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
|
DPoint const rot_origin_in_src_grid = get_mapped_point(*rotated, Point(0, 0), -rotation);
|
||||||
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
|
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
|
||||||
|
|
||||||
unsigned int buffer_index = 0;
|
unsigned int buffer_index = 0;
|
||||||
pixel_t* buffer = rotated->buffer;
|
pvalue_t* buffer = rotated->buffer;
|
||||||
|
|
||||||
unsigned int const quantize = 8;
|
unsigned int const quantize = 8;
|
||||||
int const& src_qwidth = src.width * quantize;
|
int const& src_qwidth = src.width * quantize;
|
||||||
|
@ -736,7 +754,7 @@ Image* rotate(Image const& src, double angle)
|
||||||
buffer, buffer_index);
|
buffer, buffer_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
++buffer_index;
|
buffer_index += PIXEL_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue