kdefx Library API Documentation

kimageeffect.cpp

00001 /* This file is part of the KDE libraries
00002     Copyright (C) 1998, 1999, 2001, 2002 Daniel M. Duley <mosfet@kde.org>
00003     (C) 1998, 1999 Christian Tibirna <ctibirna@total.net>
00004     (C) 1998, 1999 Dirk A. Mueller <mueller@kde.org>
00005     (C) 1999 Geert Jansen <g.t.jansen@stud.tue.nl>
00006     (C) 2000 Josef Weidendorfer <weidendo@in.tum.de>
00007 
00008 Redistribution and use in source and binary forms, with or without
00009 modification, are permitted provided that the following conditions
00010 are met:
00011 
00012 1. Redistributions of source code must retain the above copyright
00013    notice, this list of conditions and the following disclaimer.
00014 2. Redistributions in binary form must reproduce the above copyright
00015    notice, this list of conditions and the following disclaimer in the
00016    documentation and/or other materials provided with the distribution.
00017 
00018 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00019 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00020 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00021 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00022 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00023 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00024 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00025 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00026 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00027 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00028 
00029 */
00030 
00031 // $Id: kimageeffect.cpp,v 1.50.2.2 2004/06/28 14:39:15 lukas Exp $
00032 
00033 #include <math.h>
00034 #include <assert.h>
00035 
00036 #include <qimage.h>
00037 #include <stdlib.h>
00038 #include <iostream>
00039 
00040 #include "kimageeffect.h"
00041 #include "kcpuinfo.h"
00042 
00043 #include <config.h>
00044 
00045 #if defined(__i386__) && ( defined(__GNUC__) || defined(__INTEL_COMPILER) )
00046 #  if defined( HAVE_X86_MMX )
00047 #    define USE_MMX_INLINE_ASM
00048 #  endif
00049 #  if defined( HAVE_X86_SSE2 )
00050 #    define USE_SSE2_INLINE_ASM
00051 #  endif
00052 #endif
00053 
00054 //======================================================================
00055 //
00056 // Utility stuff for effects ported from ImageMagick to QImage
00057 //
00058 //======================================================================
00059 #define MaxRGB 255L
00060 #define DegreesToRadians(x) ((x)*M_PI/180.0)
00061 #define MagickSQ2PI 2.50662827463100024161235523934010416269302368164062
00062 #define MagickEpsilon  1.0e-12
00063 #define MagickPI  3.14159265358979323846264338327950288419716939937510
00064 
00065 static inline unsigned int intensityValue(unsigned int color)
00066 {
00067     return((unsigned int)((0.299*qRed(color) +
00068                            0.587*qGreen(color) +
00069                            0.1140000000000001*qBlue(color))));
00070 }
00071 
00072 static inline void liberateMemory(void **memory)
00073 {
00074     assert(memory != (void **)NULL);
00075     if(*memory == (void *)NULL) return;
00076     free(*memory);
00077     *memory=(void *) NULL;
00078 }
00079 
00080 struct double_packet
00081 {
00082     double red;
00083     double green;
00084     double blue;
00085     double alpha;
00086 };
00087 
00088 struct short_packet
00089 {
00090     unsigned short int red;
00091     unsigned short int green;
00092     unsigned short int blue;
00093     unsigned short int alpha;
00094 };
00095 
00096 
00097 //======================================================================
00098 //
00099 // Gradient effects
00100 //
00101 //======================================================================
00102 
00103 QImage KImageEffect::gradient(const QSize &size, const QColor &ca,
00104     const QColor &cb, GradientType eff, int ncols)
00105 {
00106     int rDiff, gDiff, bDiff;
00107     int rca, gca, bca, rcb, gcb, bcb;
00108 
00109     QImage image(size, 32);
00110 
00111     if (size.width() == 0 || size.height() == 0) {
00112 #ifndef NDEBUG
00113       std::cerr << "WARNING: KImageEffect::gradient: invalid image" << std::endl;
00114 #endif
00115       return image;
00116     }
00117 
00118     register int x, y;
00119 
00120     rDiff = (rcb = cb.red())   - (rca = ca.red());
00121     gDiff = (gcb = cb.green()) - (gca = ca.green());
00122     bDiff = (bcb = cb.blue())  - (bca = ca.blue());
00123 
00124     if( eff == VerticalGradient || eff == HorizontalGradient ){
00125 
00126         uint *p;
00127         uint rgb;
00128 
00129         register int rl = rca << 16;
00130         register int gl = gca << 16;
00131         register int bl = bca << 16;
00132 
00133         if( eff == VerticalGradient ) {
00134 
00135             int rcdelta = ((1<<16) / size.height()) * rDiff;
00136             int gcdelta = ((1<<16) / size.height()) * gDiff;
00137             int bcdelta = ((1<<16) / size.height()) * bDiff;
00138 
00139             for ( y = 0; y < size.height(); y++ ) {
00140                 p = (uint *) image.scanLine(y);
00141 
00142                 rl += rcdelta;
00143                 gl += gcdelta;
00144                 bl += bcdelta;
00145 
00146                 rgb = qRgb( (rl>>16), (gl>>16), (bl>>16) );
00147 
00148                 for( x = 0; x < size.width(); x++ ) {
00149                     *p = rgb;
00150                     p++;
00151                 }
00152             }
00153 
00154         }
00155         else {                  // must be HorizontalGradient
00156 
00157             unsigned int *o_src = (unsigned int *)image.scanLine(0);
00158             unsigned int *src = o_src;
00159 
00160             int rcdelta = ((1<<16) / size.width()) * rDiff;
00161             int gcdelta = ((1<<16) / size.width()) * gDiff;
00162             int bcdelta = ((1<<16) / size.width()) * bDiff;
00163 
00164             for( x = 0; x < size.width(); x++) {
00165 
00166                 rl += rcdelta;
00167                 gl += gcdelta;
00168                 bl += bcdelta;
00169 
00170                 *src++ = qRgb( (rl>>16), (gl>>16), (bl>>16));
00171             }
00172 
00173             src = o_src;
00174 
00175             // Believe it or not, manually copying in a for loop is faster
00176             // than calling memcpy for each scanline (on the order of ms...).
00177             // I think this is due to the function call overhead (mosfet).
00178 
00179             for (y = 1; y < size.height(); ++y) {
00180 
00181                 p = (unsigned int *)image.scanLine(y);
00182                 src = o_src;
00183                 for(x=0; x < size.width(); ++x)
00184                     *p++ = *src++;
00185             }
00186         }
00187     }
00188 
00189     else {
00190 
00191         float rfd, gfd, bfd;
00192         float rd = rca, gd = gca, bd = bca;
00193 
00194         unsigned char *xtable[3];
00195         unsigned char *ytable[3];
00196 
00197         unsigned int w = size.width(), h = size.height();
00198         xtable[0] = new unsigned char[w];
00199         xtable[1] = new unsigned char[w];
00200         xtable[2] = new unsigned char[w];
00201         ytable[0] = new unsigned char[h];
00202         ytable[1] = new unsigned char[h];
00203         ytable[2] = new unsigned char[h];
00204         w*=2, h*=2;
00205 
00206         if ( eff == DiagonalGradient || eff == CrossDiagonalGradient) {
00207             // Diagonal dgradient code inspired by BlackBox (mosfet)
00208             // BlackBox dgradient is (C) Brad Hughes, <bhughes@tcac.net> and
00209             // Mike Cole <mike@mydot.com>.
00210 
00211             rfd = (float)rDiff/w;
00212             gfd = (float)gDiff/w;
00213             bfd = (float)bDiff/w;
00214 
00215             int dir;
00216             for (x = 0; x < size.width(); x++, rd+=rfd, gd+=gfd, bd+=bfd) {
00217                 dir = eff == DiagonalGradient? x : size.width() - x - 1;
00218                 xtable[0][dir] = (unsigned char) rd;
00219                 xtable[1][dir] = (unsigned char) gd;
00220                 xtable[2][dir] = (unsigned char) bd;
00221             }
00222             rfd = (float)rDiff/h;
00223             gfd = (float)gDiff/h;
00224             bfd = (float)bDiff/h;
00225             rd = gd = bd = 0;
00226             for (y = 0; y < size.height(); y++, rd+=rfd, gd+=gfd, bd+=bfd) {
00227                 ytable[0][y] = (unsigned char) rd;
00228                 ytable[1][y] = (unsigned char) gd;
00229                 ytable[2][y] = (unsigned char) bd;
00230             }
00231 
00232             for (y = 0; y < size.height(); y++) {
00233                 unsigned int *scanline = (unsigned int *)image.scanLine(y);
00234                 for (x = 0; x < size.width(); x++) {
00235                     scanline[x] = qRgb(xtable[0][x] + ytable[0][y],
00236                                        xtable[1][x] + ytable[1][y],
00237                                        xtable[2][x] + ytable[2][y]);
00238                 }
00239             }
00240         }
00241 
00242         else if (eff == RectangleGradient ||
00243                  eff == PyramidGradient ||
00244                  eff == PipeCrossGradient ||
00245                  eff == EllipticGradient)
00246         {
00247             int rSign = rDiff>0? 1: -1;
00248             int gSign = gDiff>0? 1: -1;
00249             int bSign = bDiff>0? 1: -1;
00250 
00251             rfd = (float)rDiff / size.width();
00252             gfd = (float)gDiff / size.width();
00253             bfd = (float)bDiff / size.width();
00254 
00255             rd = (float)rDiff/2;
00256             gd = (float)gDiff/2;
00257             bd = (float)bDiff/2;
00258 
00259             for (x = 0; x < size.width(); x++, rd-=rfd, gd-=gfd, bd-=bfd)
00260             {
00261                 xtable[0][x] = (unsigned char) abs((int)rd);
00262                 xtable[1][x] = (unsigned char) abs((int)gd);
00263                 xtable[2][x] = (unsigned char) abs((int)bd);
00264             }
00265 
00266             rfd = (float)rDiff/size.height();
00267             gfd = (float)gDiff/size.height();
00268             bfd = (float)bDiff/size.height();
00269 
00270             rd = (float)rDiff/2;
00271             gd = (float)gDiff/2;
00272             bd = (float)bDiff/2;
00273 
00274             for (y = 0; y < size.height(); y++, rd-=rfd, gd-=gfd, bd-=bfd)
00275             {
00276                 ytable[0][y] = (unsigned char) abs((int)rd);
00277                 ytable[1][y] = (unsigned char) abs((int)gd);
00278                 ytable[2][y] = (unsigned char) abs((int)bd);
00279             }
00280             unsigned int rgb;
00281             int h = (size.height()+1)>>1;
00282             for (y = 0; y < h; y++) {
00283                 unsigned int *sl1 = (unsigned int *)image.scanLine(y);
00284                 unsigned int *sl2 = (unsigned int *)image.scanLine(QMAX(size.height()-y-1, y));
00285 
00286                 int w = (size.width()+1)>>1;
00287                 int x2 = size.width()-1;
00288 
00289                 for (x = 0; x < w; x++, x2--) {
00290             rgb = 0;
00291                     if (eff == PyramidGradient) {
00292                         rgb = qRgb(rcb-rSign*(xtable[0][x]+ytable[0][y]),
00293                                    gcb-gSign*(xtable[1][x]+ytable[1][y]),
00294                                    bcb-bSign*(xtable[2][x]+ytable[2][y]));
00295                     }
00296                     if (eff == RectangleGradient) {
00297                         rgb = qRgb(rcb - rSign *
00298                                    QMAX(xtable[0][x], ytable[0][y]) * 2,
00299                                    gcb - gSign *
00300                                    QMAX(xtable[1][x], ytable[1][y]) * 2,
00301                                    bcb - bSign *
00302                                    QMAX(xtable[2][x], ytable[2][y]) * 2);
00303                     }
00304                     if (eff == PipeCrossGradient) {
00305                         rgb = qRgb(rcb - rSign *
00306                                    QMIN(xtable[0][x], ytable[0][y]) * 2,
00307                                    gcb - gSign *
00308                                    QMIN(xtable[1][x], ytable[1][y]) * 2,
00309                                    bcb - bSign *
00310                                    QMIN(xtable[2][x], ytable[2][y]) * 2);
00311                     }
00312                     if (eff == EllipticGradient) {
00313                         rgb = qRgb(rcb - rSign *
00314                                    (int)sqrt((xtable[0][x]*xtable[0][x] +
00315                                               ytable[0][y]*ytable[0][y])*2.0),
00316                                    gcb - gSign *
00317                                    (int)sqrt((xtable[1][x]*xtable[1][x] +
00318                                               ytable[1][y]*ytable[1][y])*2.0),
00319                                    bcb - bSign *
00320                                    (int)sqrt((xtable[2][x]*xtable[2][x] +
00321                                               ytable[2][y]*ytable[2][y])*2.0));
00322                     }
00323 
00324                     sl1[x] = sl2[x] = rgb;
00325                     sl1[x2] = sl2[x2] = rgb;
00326                 }
00327             }
00328         }
00329 
00330         delete [] xtable[0];
00331         delete [] xtable[1];
00332         delete [] xtable[2];
00333         delete [] ytable[0];
00334         delete [] ytable[1];
00335         delete [] ytable[2];
00336     }
00337 
00338     // dither if necessary
00339     if (ncols && (QPixmap::defaultDepth() < 15 )) {
00340     if ( ncols < 2 || ncols > 256 )
00341         ncols = 3;
00342     QColor *dPal = new QColor[ncols];
00343     for (int i=0; i<ncols; i++) {
00344         dPal[i].setRgb ( rca + rDiff * i / ( ncols - 1 ),
00345                  gca + gDiff * i / ( ncols - 1 ),
00346                  bca + bDiff * i / ( ncols - 1 ) );
00347     }
00348     dither(image, dPal, ncols);
00349     delete [] dPal;
00350     }
00351 
00352     return image;
00353 }
00354 
00355 
00356 // -----------------------------------------------------------------------------
00357 
00358 //CT this was (before Dirk A. Mueller's speedup changes)
00359 //   merely the same code as in the above method, but it's supposedly
00360 //   way less performant since it introduces a lot of supplementary tests
00361 //   and simple math operations for the calculus of the balance.
00362 //      (surprizingly, it isn't less performant, in the contrary :-)
00363 //   Yes, I could have merged them, but then the excellent performance of
00364 //   the balanced code would suffer with no other gain than a mere
00365 //   source code and byte code size economy.
00366 
00367 QImage KImageEffect::unbalancedGradient(const QSize &size, const QColor &ca,
00368     const QColor &cb, GradientType eff, int xfactor, int yfactor,
00369     int ncols)
00370 {
00371     int dir; // general parameter used for direction switches
00372 
00373     bool _xanti = false , _yanti = false;
00374 
00375     if (xfactor < 0) _xanti = true; // negative on X direction
00376     if (yfactor < 0) _yanti = true; // negative on Y direction
00377 
00378     xfactor = abs(xfactor);
00379     yfactor = abs(yfactor);
00380 
00381     if (!xfactor) xfactor = 1;
00382     if (!yfactor) yfactor = 1;
00383 
00384     if (xfactor > 200 ) xfactor = 200;
00385     if (yfactor > 200 ) yfactor = 200;
00386 
00387 
00388     //    float xbal = xfactor/5000.;
00389     //    float ybal = yfactor/5000.;
00390     float xbal = xfactor/30./size.width();
00391     float ybal = yfactor/30./size.height();
00392     float rat;
00393 
00394     int rDiff, gDiff, bDiff;
00395     int rca, gca, bca, rcb, gcb, bcb;
00396 
00397     QImage image(size, 32);
00398 
00399     if (size.width() == 0 || size.height() == 0) {
00400 #ifndef NDEBUG
00401       std::cerr << "WARNING: KImageEffect::unbalancedGradient : invalid image\n";
00402 #endif
00403       return image;
00404     }
00405 
00406     register int x, y;
00407     unsigned int *scanline;
00408 
00409     rDiff = (rcb = cb.red())   - (rca = ca.red());
00410     gDiff = (gcb = cb.green()) - (gca = ca.green());
00411     bDiff = (bcb = cb.blue())  - (bca = ca.blue());
00412 
00413     if( eff == VerticalGradient || eff == HorizontalGradient){
00414         QColor cRow;
00415 
00416         uint *p;
00417         uint rgbRow;
00418 
00419     if( eff == VerticalGradient) {
00420       for ( y = 0; y < size.height(); y++ ) {
00421         dir = _yanti ? y : size.height() - 1 - y;
00422             p = (uint *) image.scanLine(dir);
00423             rat =  1 - exp( - (float)y  * ybal );
00424 
00425             cRow.setRgb( rcb - (int) ( rDiff * rat ),
00426                          gcb - (int) ( gDiff * rat ),
00427                          bcb - (int) ( bDiff * rat ) );
00428 
00429             rgbRow = cRow.rgb();
00430 
00431             for( x = 0; x < size.width(); x++ ) {
00432           *p = rgbRow;
00433           p++;
00434             }
00435       }
00436     }
00437     else {
00438 
00439       unsigned int *src = (unsigned int *)image.scanLine(0);
00440       for(x = 0; x < size.width(); x++ )
00441         {
00442           dir = _xanti ? x : size.width() - 1 - x;
00443           rat = 1 - exp( - (float)x  * xbal );
00444 
00445           src[dir] = qRgb(rcb - (int) ( rDiff * rat ),
00446                 gcb - (int) ( gDiff * rat ),
00447                 bcb - (int) ( bDiff * rat ));
00448         }
00449 
00450       // Believe it or not, manually copying in a for loop is faster
00451       // than calling memcpy for each scanline (on the order of ms...).
00452       // I think this is due to the function call overhead (mosfet).
00453 
00454       for(y = 1; y < size.height(); ++y)
00455         {
00456           scanline = (unsigned int *)image.scanLine(y);
00457           for(x=0; x < size.width(); ++x)
00458         scanline[x] = src[x];
00459         }
00460     }
00461     }
00462 
00463     else {
00464       int w=size.width(), h=size.height();
00465 
00466       unsigned char *xtable[3];
00467       unsigned char *ytable[3];
00468       xtable[0] = new unsigned char[w];
00469       xtable[1] = new unsigned char[w];
00470       xtable[2] = new unsigned char[w];
00471       ytable[0] = new unsigned char[h];
00472       ytable[1] = new unsigned char[h];
00473       ytable[2] = new unsigned char[h];
00474 
00475       if ( eff == DiagonalGradient || eff == CrossDiagonalGradient)
00476     {
00477       for (x = 0; x < w; x++) {
00478               dir = _xanti ? x : w - 1 - x;
00479               rat = 1 - exp( - (float)x * xbal );
00480 
00481               xtable[0][dir] = (unsigned char) ( rDiff/2 * rat );
00482               xtable[1][dir] = (unsigned char) ( gDiff/2 * rat );
00483               xtable[2][dir] = (unsigned char) ( bDiff/2 * rat );
00484           }
00485 
00486       for (y = 0; y < h; y++) {
00487               dir = _yanti ? y : h - 1 - y;
00488               rat =  1 - exp( - (float)y  * ybal );
00489 
00490               ytable[0][dir] = (unsigned char) ( rDiff/2 * rat );
00491               ytable[1][dir] = (unsigned char) ( gDiff/2 * rat );
00492               ytable[2][dir] = (unsigned char) ( bDiff/2 * rat );
00493           }
00494 
00495       for (y = 0; y < h; y++) {
00496               unsigned int *scanline = (unsigned int *)image.scanLine(y);
00497               for (x = 0; x < w; x++) {
00498                   scanline[x] = qRgb(rcb - (xtable[0][x] + ytable[0][y]),
00499                                      gcb - (xtable[1][x] + ytable[1][y]),
00500                                      bcb - (xtable[2][x] + ytable[2][y]));
00501               }
00502           }
00503         }
00504 
00505       else if (eff == RectangleGradient ||
00506                eff == PyramidGradient ||
00507                eff == PipeCrossGradient ||
00508                eff == EllipticGradient)
00509       {
00510           int rSign = rDiff>0? 1: -1;
00511           int gSign = gDiff>0? 1: -1;
00512           int bSign = bDiff>0? 1: -1;
00513 
00514           for (x = 0; x < w; x++)
00515         {
00516                 dir = _xanti ? x : w - 1 - x;
00517                 rat =  1 - exp( - (float)x * xbal );
00518 
00519                 xtable[0][dir] = (unsigned char) abs((int)(rDiff*(0.5-rat)));
00520                 xtable[1][dir] = (unsigned char) abs((int)(gDiff*(0.5-rat)));
00521                 xtable[2][dir] = (unsigned char) abs((int)(bDiff*(0.5-rat)));
00522             }
00523 
00524           for (y = 0; y < h; y++)
00525           {
00526               dir = _yanti ? y : h - 1 - y;
00527 
00528               rat =  1 - exp( - (float)y * ybal );
00529 
00530               ytable[0][dir] = (unsigned char) abs((int)(rDiff*(0.5-rat)));
00531               ytable[1][dir] = (unsigned char) abs((int)(gDiff*(0.5-rat)));
00532               ytable[2][dir] = (unsigned char) abs((int)(bDiff*(0.5-rat)));
00533           }
00534 
00535           for (y = 0; y < h; y++) {
00536               unsigned int *scanline = (unsigned int *)image.scanLine(y);
00537               for (x = 0; x < w; x++) {
00538                   if (eff == PyramidGradient)
00539                   {
00540                       scanline[x] = qRgb(rcb-rSign*(xtable[0][x]+ytable[0][y]),
00541                                          gcb-gSign*(xtable[1][x]+ytable[1][y]),
00542                                          bcb-bSign*(xtable[2][x]+ytable[2][y]));
00543                   }
00544                   if (eff == RectangleGradient)
00545                   {
00546                       scanline[x] = qRgb(rcb - rSign *
00547                                          QMAX(xtable[0][x], ytable[0][y]) * 2,
00548                                          gcb - gSign *
00549                                          QMAX(xtable[1][x], ytable[1][y]) * 2,
00550                                          bcb - bSign *
00551                                          QMAX(xtable[2][x], ytable[2][y]) * 2);
00552                   }
00553                   if (eff == PipeCrossGradient)
00554                   {
00555                       scanline[x] = qRgb(rcb - rSign *
00556                                          QMIN(xtable[0][x], ytable[0][y]) * 2,
00557                                          gcb - gSign *
00558                                          QMIN(xtable[1][x], ytable[1][y]) * 2,
00559                                          bcb - bSign *
00560                                          QMIN(xtable[2][x], ytable[2][y]) * 2);
00561                   }
00562                   if (eff == EllipticGradient)
00563                   {
00564                       scanline[x] = qRgb(rcb - rSign *
00565                                          (int)sqrt((xtable[0][x]*xtable[0][x] +
00566                                                     ytable[0][y]*ytable[0][y])*2.0),
00567                                          gcb - gSign *
00568                                          (int)sqrt((xtable[1][x]*xtable[1][x] +
00569                                                     ytable[1][y]*ytable[1][y])*2.0),
00570                                          bcb - bSign *
00571                                          (int)sqrt((xtable[2][x]*xtable[2][x] +
00572                                                     ytable[2][y]*ytable[2][y])*2.0));
00573                   }
00574               }
00575           }
00576       }
00577 
00578       if (ncols && (QPixmap::defaultDepth() < 15 )) {
00579           if ( ncols < 2 || ncols > 256 )
00580               ncols = 3;
00581           QColor *dPal = new QColor[ncols];
00582           for (int i=0; i<ncols; i++) {
00583               dPal[i].setRgb ( rca + rDiff * i / ( ncols - 1 ),
00584                                gca + gDiff * i / ( ncols - 1 ),
00585                                bca + bDiff * i / ( ncols - 1 ) );
00586           }
00587           dither(image, dPal, ncols);
00588           delete [] dPal;
00589       }
00590 
00591       delete [] xtable[0];
00592       delete [] xtable[1];
00593       delete [] xtable[2];
00594       delete [] ytable[0];
00595       delete [] ytable[1];
00596       delete [] ytable[2];
00597 
00598     }
00599 
00600     return image;
00601 }
00602 
00606 namespace {
00607 
00608 struct KIE4Pack
00609 {
00610     Q_UINT16 data[4];
00611 };
00612 
00613 struct KIE8Pack
00614 {
00615     Q_UINT16 data[8];
00616 };
00617 
00618 }
00619 
00620 //======================================================================
00621 //
00622 // Intensity effects
00623 //
00624 //======================================================================
00625 
00626 
00627 /* This builds a 256 byte unsigned char lookup table with all
00628  * the possible percent values prior to applying the effect, then uses
00629  * integer math for the pixels. For any image larger than 9x9 this will be
00630  * less expensive than doing a float operation on the 3 color components of
00631  * each pixel. (mosfet)
00632  */
00633 QImage& KImageEffect::intensity(QImage &image, float percent)
00634 {
00635     if (image.width() == 0 || image.height() == 0) {
00636 #ifndef NDEBUG
00637       std::cerr << "WARNING: KImageEffect::intensity : invalid image\n";
00638 #endif
00639       return image;
00640     }
00641 
00642     int segColors = image.depth() > 8 ? 256 : image.numColors();
00643     int pixels = image.depth() > 8 ? image.width()*image.height() :
00644         image.numColors();
00645     unsigned int *data = image.depth() > 8 ? (unsigned int *)image.bits() :
00646         (unsigned int *)image.colorTable();
00647 
00648     bool brighten = (percent >= 0);
00649     if(percent < 0)
00650         percent = -percent;
00651 
00652 #ifdef USE_MMX_INLINE_ASM
00653     bool haveMMX = KCPUInfo::haveExtension( KCPUInfo::IntelMMX );
00654 
00655     if(haveMMX)
00656     {
00657         Q_UINT16 p = Q_UINT16(256.0f*(percent));
00658         KIE4Pack mult = {{p,p,p,0}};
00659 
00660         __asm__ __volatile__(
00661         "pxor %%mm7, %%mm7\n\t"                // zero mm7 for unpacking
00662         "movq  (%0), %%mm6\n\t"                // copy intensity change to mm6
00663         : : "r"(&mult), "m"(mult));
00664 
00665         unsigned int rem = pixels % 4;
00666         pixels -= rem;
00667         Q_UINT32 *end = ( data + pixels );
00668 
00669         if (brighten)
00670         {
00671             while ( data != end ) {
00672                 __asm__ __volatile__(
00673                 "movq       (%0), %%mm0\n\t"
00674                 "movq      8(%0), %%mm4\n\t"   // copy 4 pixels of data to mm0 and mm4
00675                 "movq      %%mm0, %%mm1\n\t"
00676                 "movq      %%mm0, %%mm3\n\t"
00677                 "movq      %%mm4, %%mm5\n\t"   // copy to registers for unpacking
00678                 "punpcklbw %%mm7, %%mm0\n\t"
00679                 "punpckhbw %%mm7, %%mm1\n\t"   // unpack the two pixels from mm0
00680                 "pmullw    %%mm6, %%mm0\n\t"
00681                 "punpcklbw %%mm7, %%mm4\n\t"
00682                 "pmullw    %%mm6, %%mm1\n\t"   // multiply by intensity*256
00683                 "psrlw        $8, %%mm0\n\t"   // divide by 256
00684                 "pmullw    %%mm6, %%mm4\n\t"
00685                 "psrlw        $8, %%mm1\n\t"
00686                 "psrlw        $8, %%mm4\n\t"
00687                 "packuswb  %%mm1, %%mm0\n\t"   // pack solution into mm0. saturates at 255
00688                 "movq      %%mm5, %%mm1\n\t"
00689 
00690                 "punpckhbw %%mm7, %%mm1\n\t"   // unpack 4th pixel in mm1
00691 
00692                 "pmullw    %%mm6, %%mm1\n\t"
00693                 "paddusb   %%mm3, %%mm0\n\t"   // add intesity result to original of mm0
00694                 "psrlw        $8, %%mm1\n\t"
00695                 "packuswb  %%mm1, %%mm4\n\t"   // pack upper two pixels into mm4
00696 
00697                 "movq      %%mm0, (%0)\n\t"    // rewrite to memory lower two pixels
00698                 "paddusb   %%mm5, %%mm4\n\t"
00699                 "movq      %%mm4, 8(%0)\n\t"   // rewrite upper two pixels
00700                 : : "r"(data) );
00701                 data += 4;
00702             }
00703 
00704             end += rem;
00705             while ( data != end ) {
00706                 __asm__ __volatile__(
00707                 "movd       (%0), %%mm0\n\t"   // repeat above but for
00708                 "punpcklbw %%mm7, %%mm0\n\t"   // one pixel at a time
00709                 "movq      %%mm0, %%mm3\n\t"
00710                 "pmullw    %%mm6, %%mm0\n\t"
00711                 "psrlw        $8, %%mm0\n\t"
00712                 "paddw     %%mm3, %%mm0\n\t"
00713                 "packuswb  %%mm0, %%mm0\n\t"
00714                 "movd      %%mm0, (%0)\n\t"
00715                 : : "r"(data) );
00716         data++;
00717             }
00718         }
00719         else
00720         {
00721             while ( data != end ) {
00722                 __asm__ __volatile__(
00723                 "movq       (%0), %%mm0\n\t"
00724                 "movq      8(%0), %%mm4\n\t"
00725                 "movq      %%mm0, %%mm1\n\t"
00726                 "movq      %%mm0, %%mm3\n\t"
00727 
00728                 "movq      %%mm4, %%mm5\n\t"
00729 
00730                 "punpcklbw %%mm7, %%mm0\n\t"
00731                 "punpckhbw %%mm7, %%mm1\n\t"
00732                 "pmullw    %%mm6, %%mm0\n\t"
00733                 "punpcklbw %%mm7, %%mm4\n\t"
00734                 "pmullw    %%mm6, %%mm1\n\t"
00735                 "psrlw        $8, %%mm0\n\t"
00736                 "pmullw    %%mm6, %%mm4\n\t"
00737                 "psrlw        $8, %%mm1\n\t"
00738                 "psrlw        $8, %%mm4\n\t"
00739                 "packuswb  %%mm1, %%mm0\n\t"
00740                 "movq      %%mm5, %%mm1\n\t"
00741 
00742                 "punpckhbw %%mm7, %%mm1\n\t"
00743 
00744                 "pmullw    %%mm6, %%mm1\n\t"
00745                 "psubusb   %%mm0, %%mm3\n\t"   // subtract darkening amount
00746                 "psrlw        $8, %%mm1\n\t"
00747                 "packuswb  %%mm1, %%mm4\n\t"
00748 
00749                 "movq      %%mm3, (%0)\n\t"
00750                 "psubusb   %%mm4, %%mm5\n\t"   // only change for this version is
00751                 "movq      %%mm5, 8(%0)\n\t"   // subtraction here as we are darkening image
00752                 : : "r"(data) );
00753                 data += 4;
00754             }
00755 
00756             end += rem;
00757             while ( data != end ) {
00758                 __asm__ __volatile__(
00759                 "movd       (%0), %%mm0\n\t"
00760                 "punpcklbw %%mm7, %%mm0\n\t"
00761                 "movq      %%mm0, %%mm3\n\t"
00762                 "pmullw    %%mm6, %%mm0\n\t"
00763                 "psrlw        $8, %%mm0\n\t"
00764                 "psubusw   %%mm0, %%mm3\n\t"
00765                 "packuswb  %%mm3, %%mm3\n\t"
00766                 "movd      %%mm3, (%0)\n\t"
00767                 : : "r"(data) );
00768                 data++;
00769             }
00770         }
00771         __asm__ __volatile__("emms");          // clear mmx state
00772     }
00773     else
00774 #endif // USE_MMX_INLINE_ASM
00775     {
00776         unsigned char *segTbl = new unsigned char[segColors];
00777         int tmp;
00778         if(brighten){ // keep overflow check out of loops
00779             for(int i=0; i < segColors; ++i){
00780                 tmp = (int)(i*percent);
00781                 if(tmp > 255)
00782                     tmp = 255;
00783                 segTbl[i] = tmp;
00784             }
00785         }
00786         else{
00787             for(int i=0; i < segColors; ++i){
00788                 tmp = (int)(i*percent);
00789                 if(tmp < 0)
00790                     tmp = 0;
00791                  segTbl[i] = tmp;
00792             }
00793         }
00794 
00795         if(brighten){ // same here
00796             for(int i=0; i < pixels; ++i){
00797                 int r = qRed(data[i]);
00798                 int g = qGreen(data[i]);
00799                 int b = qBlue(data[i]);
00800                 int a = qAlpha(data[i]);
00801                 r = r + segTbl[r] > 255 ? 255 : r + segTbl[r];
00802                 g = g + segTbl[g] > 255 ? 255 : g + segTbl[g];
00803                 b = b + segTbl[b] > 255 ? 255 : b + segTbl[b];
00804                 data[i] = qRgba(r, g, b,a);
00805             }
00806         }
00807         else{
00808             for(int i=0; i < pixels; ++i){
00809                 int r = qRed(data[i]);
00810                 int g = qGreen(data[i]);
00811                 int b = qBlue(data[i]);
00812                 int a = qAlpha(data[i]);
00813                 r = r - segTbl[r] < 0 ? 0 : r - segTbl[r];
00814                 g = g - segTbl[g] < 0 ? 0 : g - segTbl[g];
00815                 b = b - segTbl[b] < 0 ? 0 : b - segTbl[b];
00816                 data[i] = qRgba(r, g, b, a);
00817             }
00818         }
00819         delete [] segTbl;
00820     }
00821 
00822     return image;
00823 }
00824 
00825 QImage& KImageEffect::channelIntensity(QImage &image, float percent,
00826                                        RGBComponent channel)
00827 {
00828     if (image.width() == 0 || image.height() == 0) {
00829 #ifndef NDEBUG
00830       std::cerr << "WARNING: KImageEffect::channelIntensity : invalid image\n";
00831 #endif
00832       return image;
00833     }
00834 
00835     int segColors = image.depth() > 8 ? 256 : image.numColors();
00836     unsigned char *segTbl = new unsigned char[segColors];
00837     int pixels = image.depth() > 8 ? image.width()*image.height() :
00838         image.numColors();
00839     unsigned int *data = image.depth() > 8 ? (unsigned int *)image.bits() :
00840         (unsigned int *)image.colorTable();
00841     bool brighten = (percent >= 0);
00842     if(percent < 0)
00843         percent = -percent;
00844 
00845     if(brighten){ // keep overflow check out of loops
00846         for(int i=0; i < segColors; ++i){
00847             int tmp = (int)(i*percent);
00848             if(tmp > 255)
00849                 tmp = 255;
00850             segTbl[i] = tmp;
00851         }
00852     }
00853     else{
00854         for(int i=0; i < segColors; ++i){
00855             int tmp = (int)(i*percent);
00856             if(tmp < 0)
00857                 tmp = 0;
00858             segTbl[i] = tmp;
00859         }
00860     }
00861 
00862     if(brighten){ // same here
00863         if(channel == Red){ // and here ;-)
00864             for(int i=0; i < pixels; ++i){
00865                 int c = qRed(data[i]);
00866                 c = c + segTbl[c] > 255 ? 255 : c + segTbl[c];
00867                 data[i] = qRgba(c, qGreen(data[i]), qBlue(data[i]), qAlpha(data[i]));
00868             }
00869         }
00870         if(channel == Green){
00871             for(int i=0; i < pixels; ++i){
00872                 int c = qGreen(data[i]);
00873                 c = c + segTbl[c] > 255 ? 255 : c + segTbl[c];
00874                 data[i] = qRgba(qRed(data[i]), c, qBlue(data[i]), qAlpha(data[i]));
00875             }
00876         }
00877         else{
00878             for(int i=0; i < pixels; ++i){
00879                 int c = qBlue(data[i]);
00880                 c = c + segTbl[c] > 255 ? 255 : c + segTbl[c];
00881                 data[i] = qRgba(qRed(data[i]), qGreen(data[i]), c, qAlpha(data[i]));
00882             }
00883         }
00884 
00885     }
00886     else{
00887         if(channel == Red){
00888             for(int i=0; i < pixels; ++i){
00889                 int c = qRed(data[i]);
00890                 c = c - segTbl[c] < 0 ? 0 : c - segTbl[c];
00891                 data[i] = qRgba(c, qGreen(data[i]), qBlue(data[i]), qAlpha(data[i]));
00892             }
00893         }
00894         if(channel == Green){
00895             for(int i=0; i < pixels; ++i){
00896                 int c = qGreen(data[i]);
00897                 c = c - segTbl[c] < 0 ? 0 : c - segTbl[c];
00898                 data[i] = qRgba(qRed(data[i]), c, qBlue(data[i]), qAlpha(data[i]));
00899             }
00900         }
00901         else{
00902             for(int i=0; i < pixels; ++i){
00903                 int c = qBlue(data[i]);
00904                 c = c - segTbl[c] < 0 ? 0 : c - segTbl[c];
00905                 data[i] = qRgba(qRed(data[i]), qGreen(data[i]), c, qAlpha(data[i]));
00906             }
00907         }
00908     }
00909     delete [] segTbl;
00910 
00911     return image;
00912 }
00913 
00914 // Modulate an image with an RBG channel of another image
00915 //
00916 QImage& KImageEffect::modulate(QImage &image, QImage &modImage, bool reverse,
00917     ModulationType type, int factor, RGBComponent channel)
00918 {
00919     if (image.width() == 0 || image.height() == 0 ||
00920         modImage.width() == 0 || modImage.height() == 0) {
00921 #ifndef NDEBUG
00922       std::cerr << "WARNING: KImageEffect::modulate : invalid image\n";
00923 #endif
00924       return image;
00925     }
00926 
00927     int r, g, b, h, s, v, a;
00928     QColor clr;
00929     int mod=0;
00930     unsigned int x1, x2, y1, y2;
00931     register int x, y;
00932 
00933     // for image, we handle only depth 32
00934     if (image.depth()<32) image = image.convertDepth(32);
00935 
00936     // for modImage, we handle depth 8 and 32
00937     if (modImage.depth()<8) modImage = modImage.convertDepth(8);
00938 
00939     unsigned int *colorTable2 = (modImage.depth()==8) ?
00940                  modImage.colorTable():0;
00941     unsigned int *data1, *data2;
00942     unsigned char *data2b;
00943     unsigned int color1, color2;
00944 
00945     x1 = image.width();    y1 = image.height();
00946     x2 = modImage.width(); y2 = modImage.height();
00947 
00948     for (y = 0; y < (int)y1; y++) {
00949         data1 =  (unsigned int *) image.scanLine(y);
00950     data2 =  (unsigned int *) modImage.scanLine( y%y2 );
00951     data2b = (unsigned char *) modImage.scanLine( y%y2 );
00952 
00953     x=0;
00954     while(x < (int)x1) {
00955       color2 = (colorTable2) ? colorTable2[*data2b] : *data2;
00956       if (reverse) {
00957           color1 = color2;
00958           color2 = *data1;
00959       }
00960       else
00961           color1 = *data1;
00962 
00963       if (type == Intensity || type == Contrast) {
00964               r = qRed(color1);
00965           g = qGreen(color1);
00966           b = qBlue(color1);
00967           if (channel != All) {
00968                 mod = (channel == Red) ? qRed(color2) :
00969             (channel == Green) ? qGreen(color2) :
00970                 (channel == Blue) ? qBlue(color2) :
00971             (channel == Gray) ? qGray(color2) : 0;
00972             mod = mod*factor/50;
00973           }
00974 
00975           if (type == Intensity) {
00976             if (channel == All) {
00977               r += r * factor/50 * qRed(color2)/256;
00978               g += g * factor/50 * qGreen(color2)/256;
00979               b += b * factor/50 * qBlue(color2)/256;
00980             }
00981             else {
00982               r += r * mod/256;
00983               g += g * mod/256;
00984               b += b * mod/256;
00985             }
00986           }
00987           else { // Contrast
00988             if (channel == All) {
00989           r += (r-128) * factor/50 * qRed(color2)/128;
00990               g += (g-128) * factor/50 * qGreen(color2)/128;
00991               b += (b-128) * factor/50 * qBlue(color2)/128;
00992             }
00993             else {
00994               r += (r-128) * mod/128;
00995               g += (g-128) * mod/128;
00996               b += (b-128) * mod/128;
00997             }
00998           }
00999 
01000           if (r<0) r=0; if (r>255) r=255;
01001           if (g<0) g=0; if (g>255) g=255;
01002           if (b<0) b=0; if (b>255) b=255;
01003           a = qAlpha(*data1);
01004           *data1 = qRgba(r, g, b, a);
01005       }
01006       else if (type == Saturation || type == HueShift) {
01007           clr.setRgb(color1);
01008           clr.hsv(&h, &s, &v);
01009               mod = (channel == Red) ? qRed(color2) :
01010             (channel == Green) ? qGreen(color2) :
01011                 (channel == Blue) ? qBlue(color2) :
01012             (channel == Gray) ? qGray(color2) : 0;
01013           mod = mod*factor/50;
01014 
01015           if (type == Saturation) {
01016           s -= s * mod/256;
01017           if (s<0) s=0; if (s>255) s=255;
01018           }
01019           else { // HueShift
01020             h += mod;
01021         while(h<0) h+=360;
01022         h %= 360;
01023           }
01024 
01025           clr.setHsv(h, s, v);
01026           a = qAlpha(*data1);
01027           *data1 = clr.rgb() | ((uint)(a & 0xff) << 24);
01028       }
01029       data1++; data2++; data2b++; x++;
01030       if ( (x%x2) ==0) { data2 -= x2; data2b -= x2; }
01031         }
01032     }
01033     return image;
01034 }
01035 
01036 
01037 
01038 //======================================================================
01039 //
01040 // Blend effects
01041 //
01042 //======================================================================
01043 
01044 
01045 // Nice and fast direct pixel manipulation
01046 QImage& KImageEffect::blend(const QColor& clr, QImage& dst, float opacity)
01047 {
01048     if (dst.width() <= 0 || dst.height() <= 0)
01049         return dst;
01050 
01051     if (opacity < 0.0 || opacity > 1.0) {
01052 #ifndef NDEBUG
01053         std::cerr << "WARNING: KImageEffect::blend : invalid opacity. Range [0, 1]\n";
01054 #endif
01055         return dst;
01056     }
01057 
01058     int depth = dst.depth();
01059     if (depth != 32)
01060         dst = dst.convertDepth(32);
01061 
01062     int pixels = dst.width() * dst.height();
01063 
01064 #ifdef USE_SSE2_INLINE_ASM
01065     if ( KCPUInfo::haveExtension( KCPUInfo::IntelSSE2 ) && pixels > 16 ) {
01066         Q_UINT16 alpha = Q_UINT16( ( 1.0 - opacity ) * 256.0 );
01067 
01068         KIE8Pack packedalpha = { { alpha, alpha, alpha, 256,
01069                                       alpha, alpha, alpha, 256 } };
01070 
01071         Q_UINT16 red   = Q_UINT16( clr.red()   * 256 * opacity );
01072         Q_UINT16 green = Q_UINT16( clr.green() * 256 * opacity );
01073         Q_UINT16 blue  = Q_UINT16( clr.blue()  * 256 * opacity );
01074 
01075         KIE8Pack packedcolor = { { blue, green, red, 0,
01076                                       blue, green, red, 0 } };
01077 
01078         // Prepare the XMM5, XMM6 and XMM7 registers for unpacking and blending
01079         __asm__ __volatile__(
01080         "pxor        %%xmm7,  %%xmm7\n\t" // Zero out XMM7 for unpacking
01081         "movdqu        (%0),  %%xmm6\n\t" // Set up (1 - alpha) * 256 in XMM6
01082         "movdqu        (%1),  %%xmm5\n\t" // Set up color * alpha * 256 in XMM5
01083         : : "r"(&packedalpha), "r"(&packedcolor), "m"(packedcolor), "m"(packedalpha) );
01084 
01085         Q_UINT32 *data = reinterpret_cast<Q_UINT32*>( dst.bits() );
01086 
01087         // Check how many pixels we need to process to achieve 16 byte alignment
01088         int offset = (16 - (Q_UINT32( data ) & 0x0f)) / 4;
01089 
01090         // The main loop processes 8 pixels / iteration
01091         int remainder = (pixels - offset) % 8;
01092         pixels -= remainder;
01093 
01094         // Alignment loop
01095         for ( int i = 0; i < offset; i++ ) {
01096             __asm__ __volatile__(
01097             "movd         (%0,%1,4),      %%xmm0\n\t"  // Load one pixel to XMM1
01098             "punpcklbw       %%xmm7,      %%xmm0\n\t"  // Unpack the pixel
01099             "pmullw          %%xmm6,      %%xmm0\n\t"  // Multiply the pixel with (1 - alpha) * 256
01100             "paddw           %%xmm5,      %%xmm0\n\t"  // Add color * alpha * 256 to the result
01101             "psrlw               $8,      %%xmm0\n\t"  // Divide by 256
01102             "packuswb        %%xmm1,      %%xmm0\n\t"  // Pack the pixel to a dword
01103             "movd            %%xmm0,   (%0,%1,4)\n\t"  // Write the pixel to the image
01104             : : "r"(data), "r"(i) );
01105         }
01106 
01107         // Main loop
01108         for ( int i = offset; i < pixels; i += 8 ) {
01109             __asm__ __volatile(
01110             // Load 8 pixels to XMM registers 1 - 4
01111             "movq         (%0,%1,4),      %%xmm0\n\t"  // Load pixels 1 and 2 to XMM1
01112             "movq        8(%0,%1,4),      %%xmm1\n\t"  // Load pixels 3 and 4 to XMM2
01113             "movq       16(%0,%1,4),      %%xmm2\n\t"  // Load pixels 5 and 6 to XMM3
01114             "movq       24(%0,%1,4),      %%xmm3\n\t"  // Load pixels 7 and 8 to XMM4
01115 
01116             // Prefetch the pixels for next iteration
01117             "prefetchnta 32(%0,%1,4)            \n\t"
01118 
01119             // Blend pixels 1 and 2
01120             "punpcklbw       %%xmm7,      %%xmm0\n\t"  // Unpack the pixels
01121             "pmullw          %%xmm6,      %%xmm0\n\t"  // Multiply the pixels with (1 - alpha) * 256
01122             "paddw           %%xmm5,      %%xmm0\n\t"  // Add color * alpha * 256 to the result
01123             "psrlw               $8,      %%xmm0\n\t"  // Divide by 256
01124 
01125             // Blend pixels 3 and 4
01126             "punpcklbw       %%xmm7,      %%xmm1\n\t"  // Unpack the pixels
01127             "pmullw          %%xmm6,      %%xmm1\n\t"  // Multiply the pixels with (1 - alpha) * 256
01128             "paddw           %%xmm5,      %%xmm1\n\t"  // Add color * alpha * 256 to the result
01129             "psrlw               $8,      %%xmm1\n\t"  // Divide by 256
01130 
01131             // Blend pixels 5 and 6
01132             "punpcklbw       %%xmm7,      %%xmm2\n\t"  // Unpack the pixels
01133             "pmullw          %%xmm6,      %%xmm2\n\t"  // Multiply the pixels with (1 - alpha) * 256
01134             "paddw           %%xmm5,      %%xmm2\n\t"  // Add color * alpha * 256 to the result
01135             "psrlw               $8,      %%xmm2\n\t"  // Divide by 256
01136 
01137             // Blend pixels 7 and 8
01138             "punpcklbw       %%xmm7,      %%xmm3\n\t"  // Unpack the pixels
01139             "pmullw          %%xmm6,      %%xmm3\n\t"  // Multiply the pixels with (1 - alpha) * 256
01140             "paddw           %%xmm5,      %%xmm3\n\t"  // Add color * alpha * 256 to the result
01141             "psrlw               $8,      %%xmm3\n\t"  // Divide by 256
01142 
01143             // Pack the pixels into 2 double quadwords
01144             "packuswb        %%xmm1,      %%xmm0\n\t"  // Pack pixels 1 - 4 to a double qword
01145             "packuswb        %%xmm3,      %%xmm2\n\t"  // Pack pixles 5 - 8 to a double qword
01146 
01147             // Write the pixels back to the image
01148             "movdqa          %%xmm0,   (%0,%1,4)\n\t"  // Store pixels 1 - 4
01149             "movdqa          %%xmm2, 16(%0,%1,4)\n\t"  // Store pixels 5 - 8
01150             : : "r"(data), "r"(i) );
01151         }
01152 
01153         // Cleanup loop
01154         for ( int i = pixels; i < pixels + remainder; i++ ) {
01155             __asm__ __volatile__(
01156             "movd         (%0,%1,4),      %%xmm0\n\t"  // Load one pixel to XMM1
01157             "punpcklbw       %%xmm7,      %%xmm0\n\t"  // Unpack the pixel
01158             "pmullw          %%xmm6,      %%xmm0\n\t"  // Multiply the pixel with (1 - alpha) * 256
01159             "paddw           %%xmm5,      %%xmm0\n\t"  // Add color * alpha * 256 to the result
01160             "psrlw               $8,      %%xmm0\n\t"  // Divide by 256
01161             "packuswb        %%xmm1,      %%xmm0\n\t"  // Pack the pixel to a dword
01162             "movd            %%xmm0,   (%0,%1,4)\n\t"  // Write the pixel to the image
01163             : : "r"(data), "r"(i) );
01164         }
01165     } else
01166 #endif
01167 
01168 #ifdef USE_MMX_INLINE_ASM
01169     if ( KCPUInfo::haveExtension( KCPUInfo::IntelMMX ) && pixels > 1 ) {
01170         Q_UINT16 alpha = Q_UINT16( ( 1.0 - opacity ) * 256.0 );
01171         KIE4Pack packedalpha = { { alpha, alpha, alpha, 256 } };
01172 
01173         Q_UINT16 red   = Q_UINT16( clr.red()   * 256 * opacity );
01174         Q_UINT16 green = Q_UINT16( clr.green() * 256 * opacity );
01175         Q_UINT16 blue  = Q_UINT16( clr.blue()  * 256 * opacity );
01176 
01177         KIE4Pack packedcolor = { { blue, green, red, 0 } };
01178 
01179         __asm__ __volatile__(
01180         "pxor        %%mm7,    %%mm7\n\t"       // Zero out MM7 for unpacking
01181         "movq         (%0),    %%mm6\n\t"       // Set up (1 - alpha) * 256 in MM6
01182         "movq         (%1),    %%mm5\n\t"       // Set up color * alpha * 256 in MM5
01183         : : "r"(&packedalpha), "r"(&packedcolor), "m"(packedcolor), "m"(packedalpha) );
01184 
01185         Q_UINT32 *data = reinterpret_cast<Q_UINT32*>( dst.bits() );
01186 
01187         // The main loop processes 4 pixels / iteration
01188         int remainder = pixels % 4;
01189         pixels -= remainder;
01190 
01191         // Main loop
01192         for ( int i = 0; i < pixels; i += 4 ) {
01193             __asm__ __volatile__(
01194             // Load 4 pixels to MM registers 1 - 4
01195             "movd         (%0,%1,4),      %%mm0\n\t"  // Load the 1st pixel to MM0
01196             "movd        4(%0,%1,4),      %%mm1\n\t"  // Load the 2nd pixel to MM1
01197             "movd        8(%0,%1,4),      %%mm2\n\t"  // Load the 3rd pixel to MM2
01198             "movd       12(%0,%1,4),      %%mm3\n\t"  // Load the 4th pixel to MM3
01199 
01200             // Blend the first pixel
01201             "punpcklbw        %%mm7,      %%mm0\n\t"  // Unpack the pixel
01202             "pmullw           %%mm6,      %%mm0\n\t"  // Multiply the pixel with (1 - alpha) * 256
01203             "paddw            %%mm5,      %%mm0\n\t"  // Add color * alpha * 256 to the result
01204             "psrlw               $8,      %%mm0\n\t"  // Divide by 256
01205 
01206             // Blend the second pixel
01207             "punpcklbw        %%mm7,      %%mm1\n\t"  // Unpack the pixel
01208             "pmullw           %%mm6,      %%mm1\n\t"  // Multiply the pixel with (1 - alpha) * 256
01209             "paddw            %%mm5,      %%mm1\n\t"  // Add color * alpha * 256 to the result
01210             "psrlw               $8,      %%mm1\n\t"  // Divide by 256
01211 
01212             // Blend the third pixel
01213             "punpcklbw        %%mm7,      %%mm2\n\t"  // Unpack the pixel
01214             "pmullw           %%mm6,      %%mm2\n\t"  // Multiply the pixel with (1 - alpha) * 256
01215             "paddw            %%mm5,      %%mm2\n\t"  // Add color * alpha * 256 to the result
01216             "psrlw               $8,      %%mm2\n\t"  // Divide by 256
01217 
01218             // Blend the fourth pixel
01219             "punpcklbw        %%mm7,      %%mm3\n\t"  // Unpack the pixel
01220             "pmullw           %%mm6,      %%mm3\n\t"  // Multiply the pixel with (1 - alpha) * 256
01221             "paddw            %%mm5,      %%mm3\n\t"  // Add color * alpha * 256 to the result
01222             "psrlw               $8,      %%mm3\n\t"  // Divide by 256
01223 
01224             // Pack the pixels into 2 quadwords
01225             "packuswb         %%mm1,      %%mm0\n\t"  // Pack pixels 1 and 2 to a qword
01226             "packuswb         %%mm3,      %%mm2\n\t"  // Pack pixels 3 and 4 to a qword
01227 
01228             // Write the pixels back to the image
01229             "movq             %%mm0,  (%0,%1,4)\n\t"  // Store pixels 1 and 2
01230             "movq             %%mm2, 8(%0,%1,4)\n\t"  // Store pixels 3 and 4
01231             : : "r"(data), "r"(i) );
01232         }
01233 
01234         // Cleanup loop
01235         for ( int i = pixels; i < pixels + remainder; i++ ) {
01236             __asm__ __volatile__(
01237             "movd         (%0,%1,4),      %%mm0\n\t"  // Load one pixel to MM1
01238             "punpcklbw        %%mm7,      %%mm0\n\t"  // Unpack the pixel
01239             "pmullw           %%mm6,      %%mm0\n\t"  // Multiply the pixel with 1 - alpha * 256
01240             "paddw            %%mm5,      %%mm0\n\t"  // Add color * alpha * 256 to the result
01241             "psrlw               $8,      %%mm0\n\t"  // Divide by 256
01242             "packuswb         %%mm0,      %%mm0\n\t"  // Pack the pixel to a dword
01243             "movd             %%mm0,  (%0,%1,4)\n\t"  // Write the pixel to the image
01244             : : "r"(data), "r"(i) );
01245         }
01246 
01247         // Empty the MMX state
01248         __asm__ __volatile__("emms");
01249     } else
01250 #endif // USE_MMX_INLINE_ASM
01251 
01252     {
01253         int rcol, gcol, bcol;
01254         clr.rgb(&rcol, &gcol, &bcol);
01255 
01256 #ifdef WORDS_BIGENDIAN   // ARGB (skip alpha)
01257         register unsigned char *data = (unsigned char *)dst.bits() + 1;
01258 #else                    // BGRA
01259         register unsigned char *data = (unsigned char *)dst.bits();
01260 #endif
01261 
01262         for (register int i=0; i<pixels; i++)
01263         {
01264 #ifdef WORDS_BIGENDIAN
01265             *data += (unsigned char)((rcol - *data) * opacity);
01266             data++;
01267             *data += (unsigned char)((gcol - *data) * opacity);
01268             data++;
01269             *data += (unsigned char)((bcol - *data) * opacity);
01270             data++;
01271 #else
01272             *data += (unsigned char)((bcol - *data) * opacity);
01273             data++;
01274             *data += (unsigned char)((gcol - *data) * opacity);
01275             data++;
01276             *data += (unsigned char)((rcol - *data) * opacity);
01277             data++;
01278 #endif
01279             data++; // skip alpha
01280         }
01281     }
01282 
01283     return dst;
01284 }
01285 
01286 // Nice and fast direct pixel manipulation
01287 QImage& KImageEffect::blend(QImage& src, QImage& dst, float opacity)
01288 {
01289     if (src.width() <= 0 || src.height() <= 0)
01290         return dst;
01291     if (dst.width() <= 0 || dst.height() <= 0)
01292         return dst;
01293 
01294     if (src.width() != dst.width() || src.height() != dst.height()) {
01295 #ifndef NDEBUG
01296         std::cerr << "WARNING: KImageEffect::blend : src and destination images are not the same size\n";
01297 #endif
01298         return dst;
01299     }
01300 
01301     if (opacity < 0.0 || opacity > 1.0) {
01302 #ifndef NDEBUG
01303         std::cerr << "WARNING: KImageEffect::blend : invalid opacity. Range [0, 1]\n";
01304 #endif
01305         return dst;
01306     }
01307 
01308     if (src.depth() != 32) src = src.convertDepth(32);
01309     if (dst.depth() != 32) dst = dst.convertDepth(32);
01310 
01311     int pixels = src.width() * src.height();
01312 
01313 #ifdef USE_SSE2_INLINE_ASM
01314     if ( KCPUInfo::haveExtension( KCPUInfo::IntelSSE2 ) && pixels > 16 ) {
01315         Q_UINT16 alpha = Q_UINT16( opacity * 256.0 );
01316         KIE8Pack packedalpha = { { alpha, alpha, alpha, 0,
01317                                    alpha, alpha, alpha, 0 } };
01318 
01319         // Prepare the XMM6 and XMM7 registers for unpacking and blending
01320         __asm__ __volatile__(
01321         "pxor      %%xmm7, %%xmm7\n\t" // Zero out XMM7 for unpacking
01322         "movdqu      (%0), %%xmm6\n\t" // Set up alpha * 256 in XMM6
01323         : : "r"(&packedalpha), "m"(packedalpha) );
01324 
01325         Q_UINT32 *data1 = reinterpret_cast<Q_UINT32*>( src.bits() );
01326         Q_UINT32 *data2 = reinterpret_cast<Q_UINT32*>( dst.bits() );
01327 
01328         // Check how many pixels we need to process to achieve 16 byte alignment
01329         int offset = (16 - (Q_UINT32( data2 ) & 0x0f)) / 4;
01330 
01331         // The main loop processes 4 pixels / iteration
01332         int remainder = (pixels - offset) % 4;
01333         pixels -= remainder;
01334 
01335         // Alignment loop
01336         for ( int i = 0; i < offset; i++ ) {
01337             __asm__ __volatile__(
01338             "movd       (%1,%2,4),    %%xmm1\n\t"  // Load one dst pixel to XMM1
01339             "punpcklbw     %%xmm7,    %%xmm1\n\t"  // Unpack the pixel
01340             "movd       (%0,%2,4),    %%xmm0\n\t"  // Load one src pixel to XMM0
01341             "punpcklbw     %%xmm7,    %%xmm0\n\t"  // Unpack the pixel
01342             "psubw         %%xmm1,    %%xmm0\n\t"  // Subtract dst from src
01343             "pmullw        %%xmm6,    %%xmm0\n\t"  // Multiply the result with alpha * 256
01344             "psllw             $8,    %%xmm1\n\t"  // Multiply dst with 256
01345             "paddw         %%xmm1,    %%xmm0\n\t"  // Add dst to result
01346             "psrlw             $8,    %%xmm0\n\t"  // Divide by 256
01347             "packuswb      %%xmm1,    %%xmm0\n\t"  // Pack the pixel to a dword
01348             "movd          %%xmm0, (%1,%2,4)\n\t"  // Write the pixel to the image
01349             : : "r"(data1), "r"(data2), "r"(i) );
01350         }
01351 
01352         // Main loop
01353         for ( int i = offset; i < pixels; i += 4 ) {
01354             __asm__ __volatile__(
01355             // Load 4 src pixels to XMM0 and XMM2 and 4 dst pixels to XMM1 and XMM3
01356             "movq       (%0,%2,4),    %%xmm0\n\t"  // Load two src pixels to XMM0
01357             "movq       (%1,%2,4),    %%xmm1\n\t"  // Load two dst pixels to XMM1
01358             "movq      8(%0,%2,4),    %%xmm2\n\t"  // Load two src pixels to XMM2
01359             "movq      8(%1,%2,4),    %%xmm3\n\t"  // Load two dst pixels to XMM3
01360 
01361             // Prefetch the pixels for the iteration after the next one
01362             "prefetchnta 32(%0,%2,4)        \n\t"
01363             "prefetchnta 32(%1,%2,4)        \n\t"
01364 
01365             // Blend the first two pixels
01366             "punpcklbw     %%xmm7,    %%xmm1\n\t"  // Unpack the dst pixels
01367             "punpcklbw     %%xmm7,    %%xmm0\n\t"  // Unpack the src pixels
01368             "psubw         %%xmm1,    %%xmm0\n\t"  // Subtract dst from src
01369             "pmullw        %%xmm6,    %%xmm0\n\t"  // Multiply the result with alpha * 256
01370             "psllw             $8,    %%xmm1\n\t"  // Multiply dst with 256
01371             "paddw         %%xmm1,    %%xmm0\n\t"  // Add dst to the result
01372             "psrlw             $8,    %%xmm0\n\t"  // Divide by 256
01373 
01374             // Blend the next two pixels
01375             "punpcklbw     %%xmm7,    %%xmm3\n\t"  // Unpack the dst pixels
01376             "punpcklbw     %%xmm7,    %%xmm2\n\t"  // Unpack the src pixels
01377             "psubw         %%xmm3,    %%xmm2\n\t"  // Subtract dst from src
01378             "pmullw        %%xmm6,    %%xmm2\n\t"  // Multiply the result with alpha * 256
01379             "psllw             $8,    %%xmm3\n\t"  // Multiply dst with 256
01380             "paddw         %%xmm3,    %%xmm2\n\t"  // Add dst to the result
01381             "psrlw             $8,    %%xmm2\n\t"  // Divide by 256
01382 
01383             // Write the pixels back to the image
01384             "packuswb      %%xmm2,    %%xmm0\n\t"  // Pack the pixels to a double qword
01385             "movdqa        %%xmm0, (%1,%2,4)\n\t"  // Store the pixels
01386             : : "r"(data1), "r"(data2), "r"(i) );
01387         }
01388 
01389         // Cleanup loop
01390         for ( int i = pixels; i < pixels + remainder; i++ ) {
01391             __asm__ __volatile__(
01392             "movd       (%1,%2,4),    %%xmm1\n\t"  // Load one dst pixel to XMM1
01393             "punpcklbw     %%xmm7,    %%xmm1\n\t"  // Unpack the pixel
01394             "movd       (%0,%2,4),    %%xmm0\n\t"  // Load one src pixel to XMM0
01395             "punpcklbw     %%xmm7,    %%xmm0\n\t"  // Unpack the pixel
01396             "psubw         %%xmm1,    %%xmm0\n\t"  // Subtract dst from src
01397             "pmullw        %%xmm6,    %%xmm0\n\t"  // Multiply the result with alpha * 256
01398             "psllw             $8,    %%xmm1\n\t"  // Multiply dst with 256
01399             "paddw         %%xmm1,    %%xmm0\n\t"  // Add dst to result
01400             "psrlw             $8,    %%xmm0\n\t"  // Divide by 256
01401             "packuswb      %%xmm1,    %%xmm0\n\t"  // Pack the pixel to a dword
01402             "movd          %%xmm0, (%1,%2,4)\n\t"  // Write the pixel to the image
01403             : : "r"(data1), "r"(data2), "r"(i) );
01404         }
01405     } else
01406 #endif // USE_SSE2_INLINE_ASM
01407 
01408 #ifdef USE_MMX_INLINE_ASM
01409     if ( KCPUInfo::haveExtension( KCPUInfo::IntelMMX ) && pixels > 1 ) {
01410         Q_UINT16 alpha = Q_UINT16( opacity * 256.0 );
01411         KIE4Pack packedalpha = { { alpha, alpha, alpha, 0 } };
01412 
01413         // Prepare the MM6 and MM7 registers for blending and unpacking
01414         __asm__ __volatile__(
01415         "pxor       %%mm7,   %%mm7\n\t"      // Zero out MM7 for unpacking
01416         "movq        (%0),   %%mm6\n\t"      // Set up alpha * 256 in MM6
01417         : : "r"(&packedalpha), "m"(packedalpha) );
01418 
01419         Q_UINT32 *data1 = reinterpret_cast<Q_UINT32*>( src.bits() );
01420         Q_UINT32 *data2 = reinterpret_cast<Q_UINT32*>( dst.bits() );
01421 
01422         // The main loop processes 2 pixels / iteration
01423         int remainder = pixels % 2;
01424         pixels -= remainder;
01425 
01426         // Main loop
01427         for ( int i = 0; i < pixels; i += 2 ) {
01428             __asm__ __volatile__(
01429             // Load 2 src pixels to MM0 and MM2 and 2 dst pixels to MM1 and MM3
01430             "movd        (%0,%2,4),     %%mm0\n\t"  // Load the 1st src pixel to MM0
01431             "movd        (%1,%2,4),     %%mm1\n\t"  // Load the 1st dst pixel to MM1
01432             "movd       4(%0,%2,4),     %%mm2\n\t"  // Load the 2nd src pixel to MM2
01433             "movd       4(%1,%2,4),     %%mm3\n\t"  // Load the 2nd dst pixel to MM3
01434 
01435             // Blend the first pixel
01436             "punpcklbw       %%mm7,     %%mm0\n\t"  // Unpack the src pixel
01437             "punpcklbw       %%mm7,     %%mm1\n\t"  // Unpack the dst pixel
01438             "psubw           %%mm1,     %%mm0\n\t"  // Subtract dst from src
01439             "pmullw          %%mm6,     %%mm0\n\t"  // Multiply the result with alpha * 256
01440             "psllw              $8,     %%mm1\n\t"  // Multiply dst with 256
01441             "paddw           %%mm1,     %%mm0\n\t"  // Add dst to the result
01442             "psrlw              $8,     %%mm0\n\t"  // Divide by 256
01443 
01444             // Blend the second pixel
01445             "punpcklbw       %%mm7,     %%mm2\n\t"  // Unpack the src pixel
01446             "punpcklbw       %%mm7,     %%mm3\n\t"  // Unpack the dst pixel
01447             "psubw           %%mm3,     %%mm2\n\t"  // Subtract dst from src
01448