diff --git a/prodScreen_tests.c b/prodScreen_tests.c index 382dfbc..ef59b9c 100644 --- a/prodScreen_tests.c +++ b/prodScreen_tests.c @@ -158,6 +158,12 @@ void fbtft_transpose_neon(uint16_t* src, uint16_t* dst, int w, int h){ for (y=0; yw/2, h = image_rgb_16b->h; SDL_Surface *image_rgb_16b_notsquare = SDL_CreateRGBSurface(SDL_SWSURFACE, w, h, 16, 0,0,0,0); @@ -1008,6 +1251,7 @@ int h = image_rgb_16b->h, w = image_rgb_16b->w; uint16_t * p = (uint16_t *)image_rgb_16b->pixels; SDL_Surface *image_rgb_16b_transposed = SDL_CreateRGBSurface(SDL_SWSURFACE, h, w, 16, 0,0,0,0); uint16_t * p2 = (uint16_t *)image_rgb_16b_transposed->pixels; + int i; uint32_t now = SDL_GetTicks(); @@ -1068,6 +1312,65 @@ now = SDL_GetTicks(); +/****************************************** 4 bis (optims neon) ********************************/ +#if 1 +//* Vars */ +int h = image_rgb_16b->h, w = image_rgb_16b->w; +uint16_t * p = (uint16_t *)image_rgb_16b->pixels; +SDL_Surface *image_rgb_16b_transposed = SDL_CreateRGBSurface(SDL_SWSURFACE, h, w, 16, 0,0,0,0); +uint16_t * p2 = (uint16_t *)image_rgb_16b_transposed->pixels; + +int i; +uint32_t now = SDL_GetTicks(); + + +/* Saved perfs for 10000 iterations: */ +/* Rotate square optimized with memcpy: 2381ms +* Rotate square optimized exported: 9847ms +* Translate soft: 8645ms +* Translate soft a la mano 4x4: 11110ms +* Translate neon: 7800ms +*/ + + + +#define ITERATIONS 3000 +printf("\n"); + +#ifdef __ARM_FP + +/* Rotate 270 with prefetch neon */ +for (i=0; i