Decompose complex lines.

Create a bunch of const variables to better profile.
master
Fabien Freling 2014-07-27 22:40:03 +02:00
parent 4dba86c9f0
commit da4bdf7702
4 changed files with 23 additions and 13 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
*.swp
*.o
rotation
rotated*.ppm
rotated*.pnm
*.png

View File

@ -1,9 +1,8 @@
include Makefile.rules
CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math -g $(CXXFLAGS_PLAFORM)
LFLAGS = #-flto
DEFINES = #-DSIMD
BUILD_DIR = /tmp
BUILD_DIR = .
SRC = rotation.cpp \
image.cpp \
pnm.cpp
@ -19,7 +18,7 @@ all: $(OBJS)
$(CXX) $(CXXFLAGS) $(DEFINES) $< -c -o $@
clean:
@rm -f *~ *.o .*.swp *.ppm *.pgm *.pnm cachegrind.out.*
@rm -f rotation *~ *.o .*.swp *.ppm *.pgm *.pnm cachegrind.out.* callgrind.out.*
run: all
$(BUILD_DIR)/rotation $(IMG)

6
configure vendored
View File

@ -4,12 +4,14 @@ case `uname -s` in
Darwin)
echo 'CXX = clang++' > Makefile.rules
echo 'CXXFLAGS_PLATFORM = ' > Makefile.rules
echo 'CXXFLAGS_PLATFORM = ' >> Makefile.rules
echo 'LFLAGS = -flto' >> Makefile.rules
;;
Linux)
echo 'CXX = g++' > Makefile.rules
echo 'CXXFLAGS_PLATFORM = ' > Makefile.rules
echo 'CXXFLAGS_PLATFORM = ' >> Makefile.rules
echo 'LFLAGS = ' >> Makefile.rules
;;
esac

View File

@ -11,6 +11,7 @@
#include <xmmintrin.h>
#include <emmintrin.h>
#include <tmmintrin.h>
#include "image.h"
@ -177,6 +178,7 @@ void rotate_pixel(Image const& src,
// Bilinear interpolation
unsigned int const src_index_1 = (src_y * src.width + src_x) * src.pixel_size;
unsigned int const src_index_2 = src_index_1 + src.pixel_size;
unsigned int const src_index_3 = src_index_1 + src.pixel_size * src.width;
unsigned int const src_index_4 = src_index_3 + src.pixel_size;
@ -186,6 +188,11 @@ void rotate_pixel(Image const& src,
return;
}
pvalue_t const src_tl = src.buffer[src_index_1];
pvalue_t const src_tr = src.buffer[src_index_2];
pvalue_t const src_bl = src.buffer[src_index_3];
pvalue_t const src_br = src.buffer[src_index_4];
unsigned int const x_delta = (src_rotated_point.x >> (q_pow - q_inter_pow)) & mask;
unsigned int const y_delta = (src_rotated_point.y >> (q_pow - q_inter_pow)) & mask;
unsigned int const inv_x = q_inter - x_delta;
@ -193,10 +200,9 @@ void rotate_pixel(Image const& src,
#ifndef SIMD
unsigned int src_index_2 = src_index_1 + src.pixel_size;
rotate_buffer[rot_index] = ((src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y
+ (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta) >> (q_inter_pow << 1);
pvalue_t interpolated = ((src_tl * inv_x + src_tr * x_delta) * inv_y
+ (src_bl * inv_x + src_br * x_delta) * y_delta) >> (q_inter_pow << 1);
rotate_buffer[rot_index] = interpolated;
// rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
// + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6;
// rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
@ -260,13 +266,16 @@ Image* rotate(Image const& src, double angle)
int64_t const src_qheight = (int64_t) src.height * q_pos;
unsigned int const src_limit = src.width * src.height * src.pixel_size;
size_t const buffer_step = rotated->pixel_size;
int const width = rotated->width;
int const height = rotated->height;
for (int y = 0; y < (int) rotated->height; ++y)
for (int y = 0; y < height; ++y)
{
Point src_rotated_point((rot_origin_in_src.x * q_pos) + y * qdy.x,
(rot_origin_in_src.y * q_pos) + y * qdy.y);
for (unsigned int x = 0; x < rotated->width; ++x)
for (int x = 0; x < width; ++x)
{
if (src_rotated_point.x >= 0 && src_rotated_point.x < src_qwidth
&& src_rotated_point.y >= 0 && src_rotated_point.y < src_qheight)
@ -278,8 +287,7 @@ Image* rotate(Image const& src, double angle)
}
src_rotated_point += qdx;
buffer_index += rotated->pixel_size;
buffer_index += buffer_step;
}
}