[mythtv] Re: denoise3d + Athlon XP bug

Jay Merrifield fracmak at gmail.com
Thu Apr 28 03:08:10 UTC 2005


Yes, everything still runs fine. :-) It seems that updating the
mm_support function doesnt' actually enable mmx support for the osd,
it just allows the cpp file to compile. I've been poking around seeing
where else we could take away i386 for MMX, even though the blending
functions for osdsurface compile correctly when I take out the i386,
it segfaults when I try and watch videos, so that's a nogo. I changed
the i386 in the denoise function to MMX (after updating the
mm_support() function) (makes me wonder if this code should all be
merged together). No problems with denoise function mmx code. It
actually fixes the original problem I was having at the beginning of
all this. (woopie!).  I would say I'm seeing around a 3-5% speed
improvement on my end.

Jay

On 4/27/05, Robert Tsai <rtsai1111 at comcast.net> wrote:
> On Wed, Apr 27, 2005 at 09:49:56PM -0400, Jay Merrifield wrote:
> > I did a little research and figured out that the compile errors have
> > already been fixed in libavcodec which has the exact function used by
> > osdsurface.cpp. I ported over the function and hit compile and
> > everything compiles fine. I'm getting an odd compiler warning, but I
> > don't think it's that important. Here's the diff -u with the latest
> > cvs.
> 
> After everything compiles fine, does everything also run fine? :)
> 
> Do you see/feel a difference in your CPU utilization when you have an
> OSD on your screen?
> 
> > Index: osdsurface.cpp
> > ===================================================================
> > RCS file: /var/lib/mythcvs/mythtv/libs/libmythtv/osdsurface.cpp,v
> > retrieving revision 1.12
> > diff -u -r1.12 osdsurface.cpp
> > --- osdsurface.cpp      28 Apr 2005 01:29:59 -0000      1.12
> > +++ osdsurface.cpp      28 Apr 2005 01:43:59 -0000
> > @@ -9,16 +9,26 @@
> >  #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
> >  #define MM_SSE    0x0008 /* SSE functions */
> >  #define MM_SSE2   0x0010 /* PIV SSE2 functions */
> > +#define MM_3DNOWEXT  0x0020 /* AMD 3DNowExt */
> >
> >  #ifdef MMX
> >
> >  #include "mmx.h"
> >
> > +#ifdef ARCH_X86_64
> > +#  define REG_b "rbx"
> > +#  define REG_S "rsi"
> > +#else
> > +#  define REG_b "ebx"
> > +#  define REG_S "esi"
> > +#endif
> > +
> > +/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
> >  #define cpuid(index,eax,ebx,ecx,edx)\
> >      __asm __volatile\
> > -        ("movl %%ebx, %%esi\n\t"\
> > +        ("mov %%"REG_b", %%"REG_S"\n\t"\
> >           "cpuid\n\t"\
> > -         "xchgl %%ebx, %%esi"\
> > +         "xchg %%"REG_b", %%"REG_S\
> >           : "=a" (eax), "=S" (ebx),\
> >             "=c" (ecx), "=d" (edx)\
> >           : "0" (index));
> > @@ -28,84 +38,70 @@
> >  /* Function to test if multimedia instructions are supported...  */
> >  int mm_support(void)
> >  {
> > -    int rval;
> > +    int rval = 0;
> >      int eax, ebx, ecx, edx;
> > +    int max_std_level=0, max_ext_level, std_caps=0, ext_caps=0;
> > +    long a, c;
> >
> >      __asm__ __volatile__ (
> >                            /* See if CPUID instruction is supported ... */
> >                            /* ... Get copies of EFLAGS into eax and ecx */
> >                            "pushf\n\t"
> > -                          "popl %0\n\t"
> > -                          "movl %0, %1\n\t"
> > +                          "pop %0\n\t"
> > +                          "mov %0, %1\n\t"
> >
> >                            /* ... Toggle the ID bit in one copy and store */
> >                           /*     to the EFLAGS reg */
> > -                          "xorl $0x200000, %0\n\t"
> > +                          "xor $0x200000, %0\n\t"
> >                            "push %0\n\t"
> >                            "popf\n\t"
> > -
> >                            /* ... Get the (hopefully modified) EFLAGS */
> >                            "pushf\n\t"
> > -                          "popl %0\n\t"
> > -                          : "=a" (eax), "=c" (ecx)
> > +                          "pop %0\n\t"
> > +                          : "=a" (a), "=c" (c)
> >                            :
> >                            : "cc"
> >                            );
> >
> > -    if (eax == ecx)
> > -        return 0; /* CPUID not supported */
> > +    if (a == c)
> > +      return 0; /* CPUID not supported */
> >
> > -    cpuid(0, eax, ebx, ecx, edx);
> >
> > -    if (ebx == 0x756e6547 &&
> > -        edx == 0x49656e69 &&
> > -        ecx == 0x6c65746e) {
> > -
> > -        /* intel */
> > -    inteltest:
> > -        cpuid(1, eax, ebx, ecx, edx);
> > -        if ((edx & 0x00800000) == 0)
> > -            return 0;
> > -        rval = MM_MMX;
> > -        if (edx & 0x02000000)
> > -            rval |= MM_MMXEXT | MM_SSE;
> > -        if (edx & 0x04000000)
> > -            rval |= MM_SSE2;
> > -        return rval;
> > -    } else if (ebx == 0x68747541 &&
> > +    if(max_std_level >= 1){
> > +      cpuid(1, eax, ebx, ecx, std_caps);
> > +      if (std_caps & (1<<23))
> > +       rval |= MM_MMX;
> > +      if (std_caps & (1<<25))
> > +       rval |= MM_MMXEXT | MM_SSE;
> > +      if (std_caps & (1<<26))
> > +       rval |= MM_SSE2;
> > +    }
> > +
> > +    cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
> > +
> > +    if(max_ext_level >= 0x80000001){
> > +      cpuid(0x80000001, eax, ebx, ecx, ext_caps);
> > +      if (ext_caps & (1<<31))
> > +       rval |= MM_3DNOW;
> > +      if (ext_caps & (1<<30))
> > +       rval |= MM_3DNOWEXT;
> > +      if (ext_caps & (1<<23))
> > +       rval |= MM_MMX;
> > +    }
> > +
> > +    cpuid(0, eax, ebx, ecx, edx);
> > +    if (       ebx == 0x68747541 &&
> >                 edx == 0x69746e65 &&
> >                 ecx == 0x444d4163) {
> >          /* AMD */
> > -        cpuid(0x80000000, eax, ebx, ecx, edx);
> > -        if ((unsigned)eax < 0x80000001)
> > -            goto inteltest;
> > -        cpuid(0x80000001, eax, ebx, ecx, edx);
> > -        if ((edx & 0x00800000) == 0)
> > -            return 0;
> > -        rval = MM_MMX;
> > -        if (edx & 0x80000000)
> > -            rval |= MM_3DNOW;
> > -        if (edx & 0x00400000)
> > -            rval |= MM_MMXEXT;
> > -        return rval;
> > +      if(ext_caps & (1<<22))
> > +       rval |= MM_MMXEXT;
> >      } else if (ebx == 0x746e6543 &&
> >                 edx == 0x48727561 &&
> >                 ecx == 0x736c7561) {  /*  "CentaurHauls" */
> >          /* VIA C3 */
> > -        cpuid(0x80000000, eax, ebx, ecx, edx);
> > -        if ((unsigned)eax < 0x80000001)
> > -            goto inteltest;
> > -        cpuid(0x80000001, eax, ebx, ecx, edx);
> > -        rval = 0;
> > -        if ( edx & ( 1 << 31) )
> > -          rval |= MM_3DNOW;
> > -        if ( edx & ( 1 << 23) )
> > -          rval |= MM_MMX;
> > -        if ( edx & ( 1 << 24) )
> > -          rval |= MM_MMXEXT;
> > -        if (rval==0)
> > -            goto inteltest;
> > -        return rval;
> > +      if(ext_caps & (1<<24))
> > +       rval |= MM_MMXEXT;
> >      } else if (ebx == 0x69727943 &&
> >                 edx == 0x736e4978 &&
> >                 ecx == 0x64616574) {
> > @@ -117,31 +113,15 @@
> >             CPUID/0 to see if standard CPUID level 2 is supported.
> >             According to the table, the only CPU which supports level
> >             2 is also the only one which supports extended CPUID levels.
> > -        */
> > -        if (eax != 2)
> > -            goto inteltest;
> > -        cpuid(0x80000001, eax, ebx, ecx, edx);
> > -        if ((eax & 0x00800000) == 0)
> > -            return 0;
> > -        rval = MM_MMX;
> > -        if (eax & 0x01000000)
> > -            rval |= MM_MMXEXT;
> > -        return rval;
> > -    } else if (ebx == 0x756e6547 &&
> > -               edx == 0x54656e69 &&
> > -               ecx == 0x3638784d) {
> > -        /* Tranmeta Crusoe */
> > -        cpuid(0x80000000, eax, ebx, ecx, edx);
> > -        if ((unsigned)eax < 0x80000001)
> > -            return 0;
> > -        cpuid(0x80000001, eax, ebx, ecx, edx);
> > -        if ((edx & 0x00800000) == 0)
> > -            return 0;
> > -        return MM_MMX;
> > -    } else {
> > -        return 0;
> > -    }
> > +      */
> > +      if (eax < 2)
> > +       return rval;
> > +      if (ext_caps & (1<<24))
> > +       rval |= MM_MMXEXT;
> > +
> > +      return rval;
> >  }
> > +
> >  #endif
> >
> >  OSDSurface::OSDSurface(int w, int h)
> > _______________________________________________
> > mythtv-dev mailing list
> > mythtv-dev at mythtv.org
> > http://mythtv.org/cgi-bin/mailman/listinfo/mythtv-dev
> 
> --Rob
>


More information about the mythtv-dev mailing list