OpenVideo Documentation

   Main Page       Modules       Class Hierarchy       Alphabetical List       Compound List       File List       Compound Members       Related Pages   

ConverterYV12.cxx

Go to the documentation of this file.
00001 /* ========================================================================
00002  * Copyright (C) 2004-2006  Graz University of Technology
00003  *
00004  * This framework is free software; you can redistribute it and/or modify
00005  * it under the terms of the GNU General Public License as published by
00006  * the Free Software Foundation; either version 2 of the License, or
00007  * (at your option) any later version.
00008  *
00009  * This framework is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  *
00014  * You should have received a copy of the GNU General Public License
00015  * along with this framework; if not, write to the Free Software
00016  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00017  *
00018  * For further information please contact Dieter Schmalstieg under
00019  * <schmalstieg@icg.tu-graz.ac.at> or write to Dieter Schmalstieg,
00020  * Graz University of Technology, Institut für Maschinelles Sehen und Darstellen,
00021  * Inffeldgasse 16a, 8010 Graz, Austria.
00022  * ========================================================================
00023  * PROJECT: OpenVideo
00024  * ======================================================================== */
00029 /* ======================================================================= */
00030 
00031 //
00032 // Optimized Converter from YV12 to RGB565 and Luninance
00033 // Written from scratch by Daniel Wagner
00034 // For questions send a mail to: daniel@icg.tu-graz.ac.at
00035 //
00036 // Modified by Bernhard Reitinger in order to convert YV12 to RGBA
00037 //
00038 // Highly optimized C++ version. Uses look-up
00039 // tables for almost everything; thereby requires
00040 // no multiplication or if operations for doing
00041 // format conversion including saturation checks.
00042 //
00043 // Memory usage: allocates 3249 bytes for lookup tables.
00044 //
00045 // Should run pretty fast on any device. Intel IPP
00046 // probably includes a much faster version using WirelessMXX
00047 // (would only work on Intel XScale processors).
00048 //
00049 //
00050 
00051 #include <openvideo/ConverterYV12.h>
00052 #include <memory.h>
00053 
00054 
00055 namespace openvideo {
00056 
00057 
00058 #define RGB888_to_RGB32(r, g, b)        ( (unsigned int)( (((r&0xff))<<16) | (((g&0xff))<<8) | (((b&0xff))<<0) ) )
00059 
00060 
00061 void
00062 ConverterYV12::init()
00063 {
00064     int i;
00065 
00066     // initialize lookup table for capping values to 0..255
00067     // works for values [LUTCAP_MIN..LUTCAP_MAX]
00068     //
00069     lutCap0 = new unsigned char[LUTCAP_MAX-LUTCAP_MIN+1];
00070     lutCap = lutCap0 - LUTCAP_MIN;
00071 
00072     for(i=LUTCAP_MIN; i<=LUTCAP_MAX; i++)
00073         lutCap[i] = i>0 ? (i<256 ? i : 255) : 0;
00074 
00075     // initialize lookup table for multiplications
00076     //
00077     lutV_for_Red = new short[CHANNEL_RANGE];
00078     for(i=0; i<CHANNEL_RANGE; i++)
00079         lutV_for_Red[i] = static_cast<short>(1.596f*(i-128));
00080 
00081     lutU_for_Blue = new short[CHANNEL_RANGE];
00082     for(i=0; i<CHANNEL_RANGE; i++)
00083         lutU_for_Blue[i] = static_cast<short>(2.018f*(i-128));
00084 
00085     lutV_for_Green = new short[CHANNEL_RANGE];
00086     for(i=0; i<CHANNEL_RANGE; i++)
00087         lutV_for_Green[i] = static_cast<short>(-0.813f*(i-128));
00088 
00089     lutU_for_Green = new short[CHANNEL_RANGE];
00090     for(i=0; i<CHANNEL_RANGE; i++)
00091         lutU_for_Green[i] = static_cast<short>(-0.391f*(i-128));
00092 
00093     lutY = new short[CHANNEL_RANGE];
00094     for(i=0; i<CHANNEL_RANGE; i++)
00095         lutY[i] = static_cast<short>(1.164f*(i-16));
00096 }   
00097 
00098 
00099 void
00100 ConverterYV12::deinit()
00101 {
00102     delete lutCap0;
00103     delete lutV_for_Red;
00104     delete lutU_for_Blue;
00105     delete lutV_for_Green;
00106     delete lutU_for_Green;
00107     delete lutY;
00108 }
00109 
00110 
00111 void
00112 ConverterYV12::convertToRGB32(const unsigned char* nSrcYUV, int nWidth, int nHeight, unsigned int* nDstRGB32, bool nSwizzle34, int nCropX, int nCropY)
00113 {
00114     //
00115     // Each U and V value can be used for 4 pixels: (x,y) (x+1,y) (x,y+1) (x+1,y+1)
00116     // To make best use of that, we therefore do two lines at once.
00117     // YV12 has one strange specialty: For each 4 pixels in a row, the pixels 3 and 4 are
00118     // flipped. We therefore hard-code this special behavior by doing 4 pixels per line
00119     // before looping. This sums up to 8 pixels per inner loop and a quite long inner loop!
00120     //
00121     // YV12 stores YUV pixel data in the following format:
00122     // 1st block: Y at full res (8-bit)
00123     // 2nd block: V at half res (8-bit)
00124     // 3rd block: U at half res (8-bit)
00125     //
00126     // Basic formula to convert YUV to RGB:
00127     //   R = 1.164*(i-16) + 1.596*(V - 128);
00128     //   B = 1.164*(i-16) +                   2.018*(U - 128);
00129     //   G = 1.164*(i-16) - 0.813*(V - 128) - 0.391*(U - 128);
00130     //
00131     // Each pixel is first converted to R, G, B (each 8-bits) and then
00132     // merged into a 16-bits RGB565 pixel. This routine could therefore
00133     // easily be modified to support any other RGB-based pixel format.
00134     //
00135     // The final picture will miss nCropX left *and* right. Width is therefore
00136     // reduced by 2*nCropX Pixels. Take care that the resulting image must
00137     // have a width which is a multiple of 4
00138     //
00139 
00140     const int V_OFFS = nWidth*nHeight;
00141     const int U_OFFS = V_OFFS*5/4;
00142     const int croppedWidth = nWidth-2*nCropX;
00143     const int croppedHeight = nHeight-2*nCropY;
00144     const int extendedWidth = nWidth+2*nCropX;
00145     const int cropOffsY = nCropX + (nCropY/2)*nWidth;
00146     const int cropOffsUV = nCropX/2 + (nCropY/4)*nWidth/2;
00147 
00148     const unsigned char* srcU = nSrcYUV + U_OFFS + cropOffsUV;  // source pointer for U
00149     const unsigned char* srcV = nSrcYUV + V_OFFS + cropOffsUV;  // source pointer for V
00150     const unsigned char* srcY0 = nSrcYUV + cropOffsY;           // source pointer for even Y lines
00151     const unsigned char* srcY1 = nSrcYUV + cropOffsY + nWidth;  // source pointer for odd Y lines
00152 
00153     unsigned int* dst0 = nDstRGB32;                         // destination pointer for even lines
00154     unsigned int* dst1 = nDstRGB32+croppedWidth;                // destination pointer for odd lines
00155 
00156     if(nSwizzle34)
00157     {
00158         int yl = croppedHeight/2 + 1;
00159         while(--yl)                         // pre-decrement with test for !=0 is faster than post-increment on ARM processors!
00160         {
00161             int xl=croppedWidth/4 + 1;      // pre-decrement with test for !=0 is faster than post-increment on ARM processors!
00162             while(--xl)
00163             {
00164                 // each run does 8 pixels: 4 pixels in two lines
00165                 //
00166 
00167                 int R,G,B, R0,G0,B0;
00168                 int U,V,Y, Y0;
00169 
00170                 // U and V can be used for 4 pixels
00171                 //
00172                 U = *srcU++;
00173                 V = *srcV++;
00174 
00175                 R0 = getV_for_Red(V);
00176                 B0 = getU_for_Blue(U);
00177                 G0 = getV_for_Green(V) + getU_for_Green(U);
00178 
00179 
00180                 Y = srcY0[0];
00181                 Y0 = getY(Y);
00182 
00183                 R = cap(Y0 + R0);
00184                 G = cap(Y0 + G0);
00185                 B = cap(Y0 + B0);
00186 
00187                 dst0[0] = RGB888_to_RGB32(R,G,B);
00188 
00189 
00190                 Y = srcY0[1];
00191                 Y0 = getY(Y);
00192 
00193                 R = cap(Y0 + R0);
00194                 G = cap(Y0 + G0);
00195                 B = cap(Y0 + B0);
00196 
00197                 dst0[1] = RGB888_to_RGB32(R,G,B);
00198 
00199 
00200                 Y = srcY1[0];
00201                 Y0 = getY(Y);
00202 
00203                 R = cap(Y0 + R0);
00204                 G = cap(Y0 + G0);
00205                 B = cap(Y0 + B0);
00206 
00207                 dst1[0] = RGB888_to_RGB32(R,G,B);
00208 
00209 
00210                 Y = srcY1[1];
00211                 Y0 = getY(Y);
00212 
00213                 R = cap(Y0 + R0);
00214                 G = cap(Y0 + G0);
00215                 B = cap(Y0 + B0);
00216 
00217                 dst1[1] = RGB888_to_RGB32(R,G,B);
00218 
00219 
00220                 // U and V can be used for 4 pixels
00221                 //
00222                 U = *srcU++;
00223                 V = *srcV++;
00224 
00225                 R0 = getV_for_Red(V);
00226                 B0 = getU_for_Blue(U);
00227                 G0 = getV_for_Green(V) + getU_for_Green(U);
00228 
00229 
00230                 Y = srcY0[3];
00231                 Y0 = getY(Y);
00232 
00233                 R = cap(Y0 + R0);
00234                 G = cap(Y0 + G0);
00235                 B = cap(Y0 + B0);
00236 
00237                 dst0[2] = RGB888_to_RGB32(R,G,B);
00238 
00239 
00240                 Y = srcY0[2];
00241                 Y0 = getY(Y);
00242 
00243                 R = cap(Y0 + R0);
00244                 G = cap(Y0 + G0);
00245                 B = cap(Y0 + B0);
00246 
00247                 dst0[3] = RGB888_to_RGB32(R,G,B);
00248 
00249 
00250                 Y = srcY1[3];
00251                 Y0 = getY(Y);
00252 
00253                 R = cap(Y0 + R0);
00254                 G = cap(Y0 + G0);
00255                 B = cap(Y0 + B0);
00256 
00257                 dst1[2] = RGB888_to_RGB32(R,G,B);
00258 
00259 
00260                 Y = srcY1[2];
00261                 Y0 = getY(Y);
00262 
00263                 R = cap(Y0 + R0);
00264                 G = cap(Y0 + G0);
00265                 B = cap(Y0 + B0);
00266 
00267                 dst1[3] = RGB888_to_RGB32(R,G,B);
00268 
00269 
00270                 dst0 += 4;
00271                 dst1 += 4;
00272                 srcY0 += 4;
00273                 srcY1 += 4;
00274             }
00275 
00276             dst0 += croppedWidth;
00277             dst1 += croppedWidth;
00278             srcU += nCropX;
00279             srcV += nCropX;
00280             srcY0 += extendedWidth;
00281             srcY1 += extendedWidth;
00282         }
00283     }
00284     else
00285     {
00286         int yl = croppedHeight/2 + 1;
00287         while(--yl)                         // pre-decrement with test for !=0 is faster than post-increment on ARM processors!
00288         {
00289             int xl=croppedWidth/4 + 1;      // pre-decrement with test for !=0 is faster than post-increment on ARM processors!
00290             while(--xl)
00291             {
00292                 // each run does 8 pixels: 4 pixels in two lines
00293                 //
00294 
00295                 int R,G,B, R0,G0,B0;
00296                 int U,V,Y, Y0;
00297 
00298                 // U and V can be used for 4 pixels
00299                 //
00300                 U = *srcU++;
00301                 V = *srcV++;
00302 
00303                 R0 = getV_for_Red(V);
00304                 B0 = getU_for_Blue(U);
00305                 G0 = getV_for_Green(V) + getU_for_Green(U);
00306 
00307 
00308                 Y = srcY0[0];
00309                 Y0 = getY(Y);
00310 
00311                 R = cap(Y0 + R0);
00312                 G = cap(Y0 + G0);
00313                 B = cap(Y0 + B0);
00314 
00315                 dst0[0] = RGB888_to_RGB32(R,G,B);
00316 
00317 
00318                 Y = srcY0[1];
00319                 Y0 = getY(Y);
00320 
00321                 R = cap(Y0 + R0);
00322                 G = cap(Y0 + G0);
00323                 B = cap(Y0 + B0);
00324 
00325                 dst0[1] = RGB888_to_RGB32(R,G,B);
00326 
00327 
00328                 Y = srcY1[0];
00329                 Y0 = getY(Y);
00330 
00331                 R = cap(Y0 + R0);
00332                 G = cap(Y0 + G0);
00333                 B = cap(Y0 + B0);
00334 
00335                 dst1[0] = RGB888_to_RGB32(R,G,B);
00336 
00337 
00338                 Y = srcY1[1];
00339                 Y0 = getY(Y);
00340 
00341                 R = cap(Y0 + R0);
00342                 G = cap(Y0 + G0);
00343                 B = cap(Y0 + B0);
00344 
00345                 dst1[1] = RGB888_to_RGB32(R,G,B);
00346 
00347 
00348                 // U and V can be used for 4 pixels
00349                 //
00350                 U = *srcU++;
00351                 V = *srcV++;
00352 
00353                 R0 = getV_for_Red(V);
00354                 B0 = getU_for_Blue(U);
00355                 G0 = getV_for_Green(V) + getU_for_Green(U);
00356 
00357 
00358                 Y = srcY0[2];
00359                 Y0 = getY(Y);
00360 
00361                 R = cap(Y0 + R0);
00362                 G = cap(Y0 + G0);
00363                 B = cap(Y0 + B0);
00364 
00365                 dst0[2] = RGB888_to_RGB32(R,G,B);
00366 
00367 
00368                 Y = srcY0[3];
00369                 Y0 = getY(Y);
00370 
00371                 R = cap(Y0 + R0);
00372                 G = cap(Y0 + G0);
00373                 B = cap(Y0 + B0);
00374 
00375                 dst0[3] = RGB888_to_RGB32(R,G,B);
00376 
00377 
00378                 Y = srcY1[2];
00379                 Y0 = getY(Y);
00380 
00381                 R = cap(Y0 + R0);
00382                 G = cap(Y0 + G0);
00383                 B = cap(Y0 + B0);
00384 
00385                 dst1[2] = RGB888_to_RGB32(R,G,B);
00386 
00387 
00388                 Y = srcY1[3];
00389                 Y0 = getY(Y);
00390 
00391                 R = cap(Y0 + R0);
00392                 G = cap(Y0 + G0);
00393                 B = cap(Y0 + B0);
00394 
00395                 dst1[3] = RGB888_to_RGB32(R,G,B);
00396 
00397 
00398                 dst0 += 4;
00399                 dst1 += 4;
00400                 srcY0 += 4;
00401                 srcY1 += 4;
00402             }
00403 
00404             dst0 += croppedWidth;
00405             dst1 += croppedWidth;
00406             srcU += nCropX;
00407             srcV += nCropX;
00408             srcY0 += extendedWidth;
00409             srcY1 += extendedWidth;
00410         }
00411     }
00412 }
00413 
00414 
00415 void
00416 ConverterYV12::convertToLum(const unsigned char* nSrcYUV, int nWidth, int nHeight, unsigned char* nDstLum, bool nSwizzle34, int nCropX, int nCropY)
00417 {
00418     // Converts from YV12 to Luminance (8-bit gray). Luminance is stored in full
00419     // resolution as the first block in the YV12 image. Unfortunately, for every 
00420     // four pixels we have to switch pixels 3 and 4.
00421     //
00422     // The final picture will miss nCropX left *and* right. Width is therefore
00423     // reduced by 2*nCropX Pixels. Take care that the resulting image must
00424     // have a width which is a multiple of 16. nCropX must be an even number.
00425     //
00426 
00427     const unsigned int* src = reinterpret_cast<const unsigned int*>(nSrcYUV+nCropX+(nCropY/2)*nWidth);      // source pointer
00428     unsigned int* dst = reinterpret_cast<unsigned int*>(nDstLum);                                           // destination pointer
00429     const int numRuns = (nWidth-2*nCropX)/16 + 1;
00430 
00431     int y = nHeight-2*nCropY+1;
00432     while(--y)
00433     {
00434         // the algorithm's inner loop uses 7 variables. so everything
00435         // should fit into registers on ARM processors...
00436         //
00437         int x = numRuns;                                                // +1 for pre-decriment (faster on ARM processors)
00438         int v0,v1,v2,v3;
00439 
00440         if(nSwizzle34)
00441         {
00442             while(--x)  
00443             {
00444                 // read 16 pixels
00445                 //
00446                 v0 = *src++;
00447                 v1 = *src++;
00448                 v2 = *src++;
00449                 v3 = *src++;
00450 
00451                 // write 16 pixels. do byte swizzling to exchange pixels 3 and 4 of each quadruple
00452                 //
00453                 *dst++ = (v0&0x0000ffff) | ((v0>>8)&0x00ff0000) | ((v0<<8)&0xff000000);
00454                 *dst++ = (v1&0x0000ffff) | ((v1>>8)&0x00ff0000) | ((v1<<8)&0xff000000);
00455                 *dst++ = (v2&0x0000ffff) | ((v2>>8)&0x00ff0000) | ((v2<<8)&0xff000000);
00456                 *dst++ = (v3&0x0000ffff) | ((v3>>8)&0x00ff0000) | ((v3<<8)&0xff000000);
00457             }
00458         }
00459         else
00460         {
00461             x--;
00462             memcpy(dst,src, x*16);
00463             src += x*4;
00464             dst += x*4;
00465         }
00466 
00467         src += nCropX/2;
00468     }
00469 }
00470 
00471 
00472 }  // namespace openvideo
00473 
00474 //========================================================================
00475 // End of $FILENAME$
00476 //========================================================================
00477 // Local Variables:
00478 // mode: c++
00479 // c-basic-offset: 4
00480 // eval: (c-set-offset 'substatement-open 0)
00481 // eval: (c-set-offset 'case-label '+)
00482 // eval: (c-set-offset 'statement 'c-lineup-runin-statements)
00483 // eval: (setq indent-tabs-mode nil)
00484 // End:
00485 //========================================================================
 This page was generated at Wed May 31 13:04:16 2006 for OpenVideo by Doxygen.
 If you have any comments, please send a message to schmalstieg@icg.tu-graz.ac.at.
www.studierstube.org