From cce4d45ba61d54763a7e0535a9e18c3b8da421a7 Mon Sep 17 00:00:00 2001 From: Fabien Freling Date: Sat, 12 Jul 2014 22:37:15 +0200 Subject: [PATCH] Implement pixels as RGBX structure. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add ‘make debug’ target - Add links in README --- Makefile | 3 ++ README.md | 8 +++++ TODO.md | 22 ++++++++----- rotation.cpp | 93 +++++++++++++++++++++++++++------------------------- 4 files changed, 73 insertions(+), 53 deletions(-) diff --git a/Makefile b/Makefile index 20ec108..e6dfa57 100644 --- a/Makefile +++ b/Makefile @@ -12,5 +12,8 @@ clean: run: all $(BUILD_DIR)/rotation $(IMG) +debug: all + lldb $(BUILD_DIR)/rotation $(IMG) + cachegrind: all valgrind --tool=cachegrind $(BUILD_DIR)/rotation $(IMG) diff --git a/README.md b/README.md index cd04cc7..0690315 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ # RotateMeFast This project aims to rotate bitmap images very quickly (around a millisecond). + +## Links + +* [What Every Programmer Should Know About Memory](http://www.akkadia.org/drepper/cpumemory.pdf) +* [Best Practices for Using vImage](https://developer.apple.com/library/ios/documentation/Performance/Conceptual/vImage/BestPractices/BestPractices.html) + * [vImageRotate_ARGB8888](https://developer.apple.com/library/mac/documentation/Performance/Reference/vImage_geometric/Reference/reference.html#//apple_ref/c/func/vImageRotate_ARGB8888) +* [Vectorising code to take advantage of modern CPUs](http://www.walkingrandomly.com/?p=3378) + * http://locklessinc.com/articles/vectorize/ diff --git a/TODO.md b/TODO.md index 6ac1571..bdb3740 100644 --- a/TODO.md +++ b/TODO.md @@ -1,25 +1,31 @@ -[-] Quaternions -[X] Draw rotated pixels in src order +[-] Draw rotated pixels in src order -> cache write miss [X] Use atan2 at beginning and end of line. Interpolation in-between values [X] Test pixel perfect 90 [ ] Fix out-of-bounds pixel set [ ] Optimization for square images? -[X] Fixed point computation? -[-] -funroll-loops +[X] Fixed point computation +[-] -funroll-loops -> no gain +[-] restrict qualifier -> unavailable in C++ # Cache [-] Rotate per channel -> no gain -[ ] Load pixels in 64-bit variable [X] Cut image in tiles [X] Overlap [-] Rotate in one temp tile then copy/move it [X] Align tiles in memory -[-] Align memory -> no gain -[ ] RGBX format + +## Alignement +[X] RGBX format (create pixel structure) on 8 bytes (can do computation in-place) +[ ] Load pixels in 64-bit variable +[ ] Align memory on 16 bytes + +## Layout +[ ] Pack 4 neighbors in 16B structure (aligned) + Each point is followed by the point below [ ] Spiral layout? # Quality -[X] Interpolate using SIMD, SSE (no big gain) +[X] Interpolate using SIMD, SSE (no big gain, alignement problem?) [ ] Image borders diff --git a/rotation.cpp b/rotation.cpp index d45ad81..294932d 100644 --- a/rotation.cpp +++ b/rotation.cpp @@ -60,6 +60,26 @@ uint8_t interpolate_packed(uint32_t pack, double x, double x_inv, double y, doub +// +// +// Pixel +// + +typedef uint8_t pvalue_t; +struct pixel_t { + pvalue_t r; + pvalue_t g; + pvalue_t b; + pvalue_t x; // padding + + pixel_t() + : r(0), g(0), b(0), x(0) + {} + +}; + + + // // // Image @@ -68,7 +88,7 @@ uint8_t interpolate_packed(uint32_t pack, double x, double x_inv, double y, doub struct Image { unsigned int width; unsigned int height; - uint8_t* buffer; + pixel_t* buffer; Image() : width(0) @@ -85,8 +105,7 @@ struct Image { { this->width = w; this->height = h; - buffer = new uint8_t[width * height * 3]; - memset(buffer, 0, width * height * 3 * sizeof (uint8_t)); + buffer = new pixel_t[width * height]; } Image(string const& path) @@ -128,26 +147,6 @@ struct Image { return true; } - void set_pixel(unsigned int x, unsigned int y, uint8_t r, uint8_t g, uint8_t b) - { - if (x >= width || y >= height) - { -// cerr << __LINE__ << " | Point (" << x << ", " << y << ") out of bounds" << endl; -// cerr << " Image dimensions: " << width << " x " << height << endl; -// assert(false); - return; - } - int index = (y * width + x) * 3; - buffer[index++] = r; - buffer[index++] = g; - buffer[index++] = b; - } - - void set_pixel(Point const& p, uint8_t r, uint8_t g, uint8_t b) - { - this->set_pixel(p.x, p.y, r, g, b); - } - protected: bool read_header(std::ifstream& istr) @@ -218,13 +217,15 @@ struct Image { virtual bool read_body(std::ifstream& istr) { unsigned int const nb_pixels = width * height; - buffer = new uint8_t[nb_pixels * 3]; + buffer = new pixel_t[nb_pixels]; - uint8_t* buf_index = buffer; - for (unsigned int i = 0; i < nb_pixels * 3; ++i) + pixel_t* pixel = buffer; + for (unsigned int i = 0; i < nb_pixels; ++i) { - *buf_index = istr.get(); - ++buf_index; + pixel->r = istr.get(); + pixel->g = istr.get(); + pixel->b = istr.get(); + ++pixel; } return true; @@ -233,11 +234,13 @@ struct Image { virtual bool write_body(std::ofstream& ostr) const { unsigned int const nb_pixels = width * height; - uint8_t* buf_index = buffer; - for (unsigned int i = 0; i < nb_pixels * 3; ++i) + pixel_t* pixel = buffer; + for (unsigned int i = 0; i < nb_pixels; ++i) { - ostr << (char) *buf_index; - ++buf_index; + ostr << (char) pixel->r; + ostr << (char) pixel->g; + ostr << (char) pixel->b; + ++pixel; } return true; @@ -640,20 +643,20 @@ inline void rotate_pixel(Image const& src, Point const& src_rotated_point, unsigned int const src_limit, - uint8_t* rotate_buffer, unsigned int rot_index) + pixel_t* rotate_buffer, unsigned int rot_index) { unsigned int const quantize = 8; int const src_x = src_rotated_point.x >> 3; int const src_y = src_rotated_point.y >> 3; - unsigned int src_index = (src_y * src.width + src_x) * 3; + unsigned int src_index = src_y * src.width + src_x; // Bilinear interpolation unsigned int src_index_1 = src_index; - unsigned int src_index_2 = src_index_1 + 3; - unsigned int src_index_3 = src_index_1 + 3 * src.width; - unsigned int src_index_4 = src_index_3 + 3; + unsigned int src_index_2 = src_index_1 + 1; + unsigned int src_index_3 = src_index_1 + 1 * src.width; + unsigned int src_index_4 = src_index_3 + 1; // Out-of-bounds check if (src_index_4 >= src_limit) @@ -665,12 +668,12 @@ void rotate_pixel(Image const& src, unsigned int const inv_y = quantize - y_delta; // No SIMD - rotate_buffer[rot_index] = ((src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y - + (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta) >> 6; - rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y - + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6; - rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y - + (src.buffer[src_index_3 + 2] * inv_x + src.buffer[src_index_4 + 2] * x_delta) * y_delta) >> 6; + rotate_buffer[rot_index].r = ((src.buffer[src_index_1].r * inv_x + src.buffer[src_index_2].r * x_delta) * inv_y + + (src.buffer[src_index_3].r * inv_x + src.buffer[src_index_4].r * x_delta) * y_delta) >> 6; + rotate_buffer[rot_index].g = ((src.buffer[src_index_1].g * inv_x + src.buffer[src_index_2].g * x_delta) * inv_y + + (src.buffer[src_index_3].g * inv_x + src.buffer[src_index_4].g * x_delta) * y_delta) >> 6; + rotate_buffer[rot_index].b = ((src.buffer[src_index_1].b * inv_x + src.buffer[src_index_2].b * x_delta) * inv_y + + (src.buffer[src_index_3].b * inv_x + src.buffer[src_index_4].b * x_delta) * y_delta) >> 6; } Image* rotate(Image const& src, double angle) @@ -709,7 +712,7 @@ Image* rotate(Image const& src, double angle) DPoint const rot_origin_in_src = convert_img_coord_precision(src, rot_origin_in_src_grid); unsigned int buffer_index = 0; - uint8_t* buffer = rotated->buffer; + pixel_t* buffer = rotated->buffer; unsigned int const quantize = 8; int const& src_qwidth = src.width * quantize; @@ -730,7 +733,7 @@ Image* rotate(Image const& src, double angle) { rotate_pixel(src, src_runner, src_limit, - buffer, buffer_index * 3); + buffer, buffer_index); } ++buffer_index;