Decompose complex lines.
Create a bunch of const variables to better profile.
This commit is contained in:
parent
4dba86c9f0
commit
da4bdf7702
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -3,6 +3,7 @@
|
|||
*.swp
|
||||
*.o
|
||||
|
||||
rotation
|
||||
rotated*.ppm
|
||||
rotated*.pnm
|
||||
*.png
|
||||
|
|
5
Makefile
5
Makefile
|
@ -1,9 +1,8 @@
|
|||
include Makefile.rules
|
||||
|
||||
CXXFLAGS = -std=c++11 -W -Wall -O3 -ffast-math -g $(CXXFLAGS_PLAFORM)
|
||||
LFLAGS = #-flto
|
||||
DEFINES = #-DSIMD
|
||||
BUILD_DIR = /tmp
|
||||
BUILD_DIR = .
|
||||
SRC = rotation.cpp \
|
||||
image.cpp \
|
||||
pnm.cpp
|
||||
|
@ -19,7 +18,7 @@ all: $(OBJS)
|
|||
$(CXX) $(CXXFLAGS) $(DEFINES) $< -c -o $@
|
||||
|
||||
clean:
|
||||
@rm -f *~ *.o .*.swp *.ppm *.pgm *.pnm cachegrind.out.*
|
||||
@rm -f rotation *~ *.o .*.swp *.ppm *.pgm *.pnm cachegrind.out.* callgrind.out.*
|
||||
|
||||
run: all
|
||||
$(BUILD_DIR)/rotation $(IMG)
|
||||
|
|
6
configure
vendored
6
configure
vendored
|
@ -4,12 +4,14 @@ case `uname -s` in
|
|||
|
||||
Darwin)
|
||||
echo 'CXX = clang++' > Makefile.rules
|
||||
echo 'CXXFLAGS_PLATFORM = ' > Makefile.rules
|
||||
echo 'CXXFLAGS_PLATFORM = ' >> Makefile.rules
|
||||
echo 'LFLAGS = -flto' >> Makefile.rules
|
||||
;;
|
||||
|
||||
Linux)
|
||||
echo 'CXX = g++' > Makefile.rules
|
||||
echo 'CXXFLAGS_PLATFORM = ' > Makefile.rules
|
||||
echo 'CXXFLAGS_PLATFORM = ' >> Makefile.rules
|
||||
echo 'LFLAGS = ' >> Makefile.rules
|
||||
;;
|
||||
|
||||
esac
|
||||
|
|
24
rotation.cpp
24
rotation.cpp
|
@ -11,6 +11,7 @@
|
|||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#include "image.h"
|
||||
|
||||
|
@ -177,6 +178,7 @@ void rotate_pixel(Image const& src,
|
|||
|
||||
// Bilinear interpolation
|
||||
unsigned int const src_index_1 = (src_y * src.width + src_x) * src.pixel_size;
|
||||
unsigned int const src_index_2 = src_index_1 + src.pixel_size;
|
||||
unsigned int const src_index_3 = src_index_1 + src.pixel_size * src.width;
|
||||
unsigned int const src_index_4 = src_index_3 + src.pixel_size;
|
||||
|
||||
|
@ -186,6 +188,11 @@ void rotate_pixel(Image const& src,
|
|||
return;
|
||||
}
|
||||
|
||||
pvalue_t const src_tl = src.buffer[src_index_1];
|
||||
pvalue_t const src_tr = src.buffer[src_index_2];
|
||||
pvalue_t const src_bl = src.buffer[src_index_3];
|
||||
pvalue_t const src_br = src.buffer[src_index_4];
|
||||
|
||||
unsigned int const x_delta = (src_rotated_point.x >> (q_pow - q_inter_pow)) & mask;
|
||||
unsigned int const y_delta = (src_rotated_point.y >> (q_pow - q_inter_pow)) & mask;
|
||||
unsigned int const inv_x = q_inter - x_delta;
|
||||
|
@ -193,10 +200,9 @@ void rotate_pixel(Image const& src,
|
|||
|
||||
#ifndef SIMD
|
||||
|
||||
unsigned int src_index_2 = src_index_1 + src.pixel_size;
|
||||
|
||||
rotate_buffer[rot_index] = ((src.buffer[src_index_1] * inv_x + src.buffer[src_index_2] * x_delta) * inv_y
|
||||
+ (src.buffer[src_index_3] * inv_x + src.buffer[src_index_4] * x_delta) * y_delta) >> (q_inter_pow << 1);
|
||||
pvalue_t interpolated = ((src_tl * inv_x + src_tr * x_delta) * inv_y
|
||||
+ (src_bl * inv_x + src_br * x_delta) * y_delta) >> (q_inter_pow << 1);
|
||||
rotate_buffer[rot_index] = interpolated;
|
||||
// rotate_buffer[rot_index + 1] = ((src.buffer[src_index_1 + 1] * inv_x + src.buffer[src_index_2 + 1] * x_delta) * inv_y
|
||||
// + (src.buffer[src_index_3 + 1] * inv_x + src.buffer[src_index_4 + 1] * x_delta) * y_delta) >> 6;
|
||||
// rotate_buffer[rot_index + 2] = ((src.buffer[src_index_1 + 2] * inv_x + src.buffer[src_index_2 + 2] * x_delta) * inv_y
|
||||
|
@ -260,13 +266,16 @@ Image* rotate(Image const& src, double angle)
|
|||
int64_t const src_qheight = (int64_t) src.height * q_pos;
|
||||
|
||||
unsigned int const src_limit = src.width * src.height * src.pixel_size;
|
||||
size_t const buffer_step = rotated->pixel_size;
|
||||
int const width = rotated->width;
|
||||
int const height = rotated->height;
|
||||
|
||||
for (int y = 0; y < (int) rotated->height; ++y)
|
||||
for (int y = 0; y < height; ++y)
|
||||
{
|
||||
Point src_rotated_point((rot_origin_in_src.x * q_pos) + y * qdy.x,
|
||||
(rot_origin_in_src.y * q_pos) + y * qdy.y);
|
||||
|
||||
for (unsigned int x = 0; x < rotated->width; ++x)
|
||||
for (int x = 0; x < width; ++x)
|
||||
{
|
||||
if (src_rotated_point.x >= 0 && src_rotated_point.x < src_qwidth
|
||||
&& src_rotated_point.y >= 0 && src_rotated_point.y < src_qheight)
|
||||
|
@ -278,8 +287,7 @@ Image* rotate(Image const& src, double angle)
|
|||
}
|
||||
|
||||
src_rotated_point += qdx;
|
||||
|
||||
buffer_index += rotated->pixel_size;
|
||||
buffer_index += buffer_step;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue