Implement pixels as RGBX structure.
- Add ‘make debug’ target - Add links in README
This commit is contained in:
parent
8175b8a06c
commit
cce4d45ba6
3
Makefile
3
Makefile
|
@ -12,5 +12,8 @@ clean:
|
||||||
run: all
|
run: all
|
||||||
$(BUILD_DIR)/rotation $(IMG)
|
$(BUILD_DIR)/rotation $(IMG)
|
||||||
|
|
||||||
|
debug: all
|
||||||
|
lldb $(BUILD_DIR)/rotation $(IMG)
|
||||||
|
|
||||||
cachegrind: all
|
cachegrind: all
|
||||||
valgrind --tool=cachegrind $(BUILD_DIR)/rotation $(IMG)
|
valgrind --tool=cachegrind $(BUILD_DIR)/rotation $(IMG)
|
||||||
|
|
|
@ -1,3 +1,11 @@
|
||||||
# RotateMeFast
|
# RotateMeFast
|
||||||
|
|
||||||
This project aims to rotate bitmap images very quickly (around a millisecond).
|
This project aims to rotate bitmap images very quickly (around a millisecond).
|
||||||
|
|
||||||
|
## Links
|
||||||
|
|
||||||
|
* [What Every Programmer Should Know About Memory](http://www.akkadia.org/drepper/cpumemory.pdf)
|
||||||
|
* [Best Practices for Using vImage](https://developer.apple.com/library/ios/documentation/Performance/Conceptual/vImage/BestPractices/BestPractices.html)
|
||||||
|
* [vImageRotate_ARGB8888](https://developer.apple.com/library/mac/documentation/Performance/Reference/vImage_geometric/Reference/reference.html#//apple_ref/c/func/vImageRotate_ARGB8888)
|
||||||
|
* [Vectorising code to take advantage of modern CPUs](http://www.walkingrandomly.com/?p=3378)
|
||||||
|
* http://locklessinc.com/articles/vectorize/
|
||||||
|
|
22
TODO.md
22
TODO.md
|
@ -1,25 +1,31 @@
|
||||||
[-] Quaternions
|
[-] Draw rotated pixels in src order -> cache write miss
|
||||||
[X] Draw rotated pixels in src order
|
|
||||||
[X] Use atan2 at beginning and end of line.
|
[X] Use atan2 at beginning and end of line.
|
||||||
Interpolation in-between values
|
Interpolation in-between values
|
||||||
[X] Test pixel perfect 90
|
[X] Test pixel perfect 90
|
||||||
[ ] Fix out-of-bounds pixel set
|
[ ] Fix out-of-bounds pixel set
|
||||||
|
|
||||||
[ ] Optimization for square images?
|
[ ] Optimization for square images?
|
||||||
[X] Fixed point computation?
|
[X] Fixed point computation
|
||||||
[-] -funroll-loops
|
[-] -funroll-loops -> no gain
|
||||||
|
[-] restrict qualifier -> unavailable in C++
|
||||||
|
|
||||||
# Cache
|
# Cache
|
||||||
[-] Rotate per channel -> no gain
|
[-] Rotate per channel -> no gain
|
||||||
[ ] Load pixels in 64-bit variable
|
|
||||||
[X] Cut image in tiles
|
[X] Cut image in tiles
|
||||||
[X] Overlap
|
[X] Overlap
|
||||||
[-] Rotate in one temp tile then copy/move it
|
[-] Rotate in one temp tile then copy/move it
|
||||||
[X] Align tiles in memory
|
[X] Align tiles in memory
|
||||||
[-] Align memory -> no gain
|
|
||||||
[ ] RGBX format
|
## Alignement
|
||||||
|
[X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place)
|
||||||
|
[ ] Load pixels in 64-bit variable
|
||||||
|
[ ] Align memory on 16 bytes
|
||||||
|
|
||||||
|
## Layout
|
||||||
|
[ ] Pack 4 neighbors in 16B structure (aligned)
|
||||||
|
Each point is followed by the point below
|
||||||
[ ] Spiral layout?
|
[ ] Spiral layout?
|
||||||
|
|
||||||
# Quality
|
# Quality
|
||||||
[X] Interpolate using SIMD, SSE (no big gain)
|
[X] Interpolate using SIMD, SSE (no big gain, alignement problem?)
|
||||||
[ ] Image borders
|
[ ] Image borders
|
||||||
|
|
93
rotation.cpp
93
rotation.cpp
|
@ -60,6 +60,26 @@ uint8_t interpolate_packed(uint32_t pack, double x, double x_inv, double y, doub
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Pixel
|
||||||
|
//
|
||||||
|
|
||||||
|
typedef uint8_t pvalue_t;
|
||||||
|
struct pixel_t {
|
||||||
|
pvalue_t r;
|
||||||
|
pvalue_t g;
|
||||||
|
pvalue_t b;
|
||||||
|
pvalue_t x; // padding
|
||||||
|
|
||||||
|
pixel_t()
|
||||||
|
: r(0), g(0), b(0), x(0)
|
||||||
|
{}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// Image
|
// Image
|
||||||
|
@ -68,7 +88,7 @@ uint8_t interpolate_packed(uint32_t pack, double x, double x_inv, double y, doub
|
||||||
struct Image {
|
struct Image {
|
||||||
unsigned int width;
|
unsigned int width;
|
||||||
unsigned int height;
|
unsigned int height;
|
||||||
uint8_t* buffer;
|
pixel_t* buffer;
|
||||||
|
|
||||||
Image()
|
Image()
|
||||||
: width(0)
|
: width(0)
|
||||||
|
@ -85,8 +105,7 @@ struct Image {
|
||||||
{
|
{
|
||||||
this->width = w;
|
this->width = w;
|
||||||
this->height = h;
|
this->height = h;
|
||||||
buffer = new uint8_t[width * height * 3];
|
buffer = new pixel_t[width * height];
|
||||||
memset(buffer, 0, width * height * 3 * sizeof (uint8_t));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Image(string const& path)
|
Image(string const& path)
|
||||||
|
@ -128,26 +147,6 @@ struct Image {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_pixel(unsigned int x, unsigned int y, uint8_t r, uint8_t g, uint8_t b)
|
|
||||||
{
|
|
||||||
if (x >= width || y >= height)
|
|
||||||
{
|
|
||||||
// cerr << __LINE__ << " | Point (" << x << ", " << y << ") out of bounds" << endl;
|
|
||||||
// cerr << " Image dimensions: " << width << " x " << height << endl;
|
|
||||||
// assert(false);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
int index = (y * width + x) * 3;
|
|
||||||
buffer[index++] = r;
|
|
||||||
buffer[index++] = g;
|
|
||||||
buffer[index++] = b;
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_pixel(Point const& p, uint8_t r, uint8_t g, uint8_t b)
|
|
||||||
{
|
|
||||||
this->set_pixel(p.x, p.y, r, g, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool read_header(std::ifstream& istr)
|
bool read_header(std::ifstream& istr)
|
||||||
|
@ -218,13 +217,15 @@ struct Image {
|
||||||
virtual bool read_body(std::ifstream& istr)
|
virtual bool read_body(std::ifstream& istr)
|
||||||
{
|
{
|
||||||
unsigned int const nb_pixels = width * height;
|
unsigned int const nb_pixels = width * height;
|
||||||
buffer = new uint8_t[nb_pixels * 3];
|
buffer = new pixel_t[nb_pixels];
|
||||||
|
|
||||||
uint8_t* buf_index = buffer;
|
pixel_t* pixel = buffer;
|
||||||
for (unsigned int i = 0; i < nb_pixels * 3; ++i)
|
for (unsigned int i = 0; i < nb_pixels; ++i)
|
||||||
{
|
{
|
||||||
*buf_index = istr.get();
|
pixel->r = istr.get();
|
||||||
++buf_index;
|
pixel->g = istr.get();
|
||||||
|
pixel->b = istr.get();
|
||||||
|
++pixel;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -233,11 +234,13 @@ struct Image {
|
||||||
virtual bool write_body(std::ofstream& ostr) const
|
virtual bool write_body(std::ofstream& ostr) const
|
||||||
{
|
{
|
||||||
unsigned int const nb_pixels = width * height;
|
unsigned int const nb_pixels = width * height;
|
||||||
uint8_t* buf_index = buffer;
|
pixel_t* pixel = buffer;
|
||||||
for (unsigned int i = 0; i < nb_pixels * 3; ++i)
|
for (unsigned int i = 0; i < nb_pixels; ++i)
|
||||||
{
|
{
|
||||||
ostr << (char) *buf_index;
|
ostr << (char) pixel->r;
|
||||||
++buf_index;
|
ostr << (char) pixel->g;
|
||||||
|
ostr << (char) pixel->b;
|
||||||
|
++pixel;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -640,20 +643,20 @@ inline
|
||||||
void rotate_pixel(Image const& src,
|
void rotate_pixel(Image const& src,
|
||||||
Point const& src_rotated_point,
|
Point const& src_rotated_point,
|
||||||
unsigned int const src_limit,
|
unsigned int const src_limit,
|
||||||
uint8_t* rotate_buffer, unsigned int rot_index)
|
pixel_t* rotate_buffer, unsigned int rot_index)
|
||||||
{
|
{
|
||||||
unsigned int const quantize = 8;
|
unsigned int const quantize = 8;
|
||||||
|
|
||||||
int const src_x = src_rotated_point.x >> 3;
|
int const src_x = src_rotated_point.x >> 3;
|
||||||
int const src_y = src_rotated_point.y >> 3;
|
int const src_y = src_rotated_point.y >> 3;
|
||||||
|
|
||||||
unsigned int src_index = (src_y * src.width + src_x) * 3;
|
unsigned int src_index = src_y * src.width + src_x;
|
||||||
|
|
||||||
// Bilinear interpolation
|
// Bilinear interpolation
|
||||||
unsigned int src_index_1 = src_index;
|
unsigned int src_index_1 = src_index;
|
||||||
unsigned int src_index_2 = src_index_1 + 3;
|
unsigned int src_index_2 = src_index_1 + 1;
|
||||||
unsigned int src_index_3 = src_index_1 + 3 * src.width;
|
unsigned int src_index_3 = src_index_1 + 1 * src.width;
|
||||||
unsigned int src_index_4 = src_index_3 + 3;
|
unsigned int src_index_4 = src_index_3 + 1;
|
||||||
|
|
||||||
// Out-of-bounds check
|
// Out-of-bounds check
|
||||||
if (src_index_4 >= src_limit)
|
if (src_index_4 >= src_limit)
|
||||||
|
@ -665,12 +668,12 @@ void rotate_pixel(Image const& src,
|
||||||
unsigned int const inv_y = quantize - y_delta;
|
unsigned int const inv_y = quantize - y_delta;
|
||||||
|
|
||||||
// No SIMD
|
// No SIMD
|
||||||
rotate_buffer[rot_index] = ((src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y
|
rotate_buffer[rot_index].r = ((src.buffer[src_index_1].r * inv_x + src.buffer[src_index_2].r * x_delta) * inv_y
|
||||||
+ (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta) >> 6;
|
+ (src.buffer[src_index_3].r * inv_x + src.buffer[src_index_4].r * x_delta) * y_delta) >> 6;
|
||||||
rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
|
rotate_buffer[rot_index].g = ((src.buffer[src_index_1].g * inv_x + src.buffer[src_index_2].g * x_delta) * inv_y
|
||||||
+ (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6;
|
+ (src.buffer[src_index_3].g * inv_x + src.buffer[src_index_4].g * x_delta) * y_delta) >> 6;
|
||||||
rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
|
rotate_buffer[rot_index].b = ((src.buffer[src_index_1].b * inv_x + src.buffer[src_index_2].b * x_delta) * inv_y
|
||||||
+ (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta) >> 6;
|
+ (src.buffer[src_index_3].b * inv_x + src.buffer[src_index_4].b * x_delta) * y_delta) >> 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
Image* rotate(Image const& src, double angle)
|
Image* rotate(Image const& src, double angle)
|
||||||
|
@ -709,7 +712,7 @@ Image* rotate(Image const& src, double angle)
|
||||||
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
|
DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid);
|
||||||
|
|
||||||
unsigned int buffer_index = 0;
|
unsigned int buffer_index = 0;
|
||||||
uint8_t* buffer = rotated->buffer;
|
pixel_t* buffer = rotated->buffer;
|
||||||
|
|
||||||
unsigned int const quantize = 8;
|
unsigned int const quantize = 8;
|
||||||
int const& src_qwidth = src.width * quantize;
|
int const& src_qwidth = src.width * quantize;
|
||||||
|
@ -730,7 +733,7 @@ Image* rotate(Image const& src, double angle)
|
||||||
{
|
{
|
||||||
rotate_pixel(src, src_runner,
|
rotate_pixel(src, src_runner,
|
||||||
src_limit,
|
src_limit,
|
||||||
buffer, buffer_index * 3);
|
buffer, buffer_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
++buffer_index;
|
++buffer_index;
|
||||||
|
|
Loading…
Reference in a new issue