韩语topik考试报名网站:x264_cpu_detect

来源:百度文库 编辑:九乡新闻网 时间:2024/04/28 16:51:20

uint32_t x264_cpu_detect( void )

{

    uint32_t cpu = 0;//定义CPU型号

    uint32_t eax, ebx, ecx, edx;//扩展的寄存器

    uint32_t vendor[4] = {0};//共同表示制造厂商的名称字符串

    int max_extended_cap;//最大扩展功能索引值

    int cache;//高速缓存类型

 

#ifndef ARCH_X86_64// [U1] 

//此处表示非64位架构

    if( !x264_cpu_cpuid_test() )//见附录一,检测CPU是否能够执行cpuid来检测CPU信息

        return 0;//不能则返回,cpu值为0

#endif

 

x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );//见附录二,下同

//利用 CPUID 指令获得 CPU 数据,一般是以 EAX 当做索引值,所给定的 EAX 索引值不同,就

//可以获得不同的 CPU 数据。这些索引值可分为两大类:基本功能 ( standard function ) 与延伸//功能 ( extended function )

    if( eax == 0 )//EAX基本功能最大索引值,无则返回

        return 0;

 

    x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );//基本功能一,CPU等级、制程及某些 CPU 特性

    if( edx&0x00800000 )    //EDX的23位表示是否支持MMX指令集

        cpu |= X264_CPU_MMX;//8 #define X264_CPU_MMX            0x000008 (x264.h)

    else

        return 0; 

if( edx&0x02000000 )    //EDX的25位表示是否支持 SSE 指令集

        cpu |= X264_CPU_MMXEXT|X264_CPU_SSE;

 //16 #define X264_CPU_MMXEXT         0x000010  /* MMX2 aka MMXEXT aka ISSE */

//32 #define X264_CPU_SSE            0x000020

    if( edx&0x04000000 )  //EDX的26位表示是否支持SSE2指令集

        cpu |= X264_CPU_SSE2;//64 #define X264_CPU_SSE2           0x000040

    if( ecx&0x00000001 )    //ECX的0位表示是否支持SSE3指令集

        cpu |= X264_CPU_SSE3;//512 #define X264_CPU_SSE3           0x000200

    if( ecx&0x00000200 )    //ECX的9位表示是否支持SSSE3指令集

        cpu |= X264_CPU_SSSE3;//1024 #define X264_CPU_SSSE3          0x000400

    if( ecx&0x00080000 )    //ECX的19位表示是否支持SSE4指令集

        cpu |= X264_CPU_SSE4;//8192 #define X264_CPU_SSE4           0x002000  /* SSE4.1 */

    if( ecx&0x00100000 )    //ECX的20位表示是否支持SSE42指令集

        cpu |= X264_CPU_SSE42;// #define X264_CPU_SSE42          0x004000  /* SSE4.2 */

 

    if( cpu & X264_CPU_SSSE3 )

        cpu |= X264_CPU_SSE2_IS_FAST;

 //256 #define X264_CPU_SSE2_IS_FAST   0x000100  /* a few functions are only faster on Core2 and Phenom */

    if( cpu & X264_CPU_SSE4 )

        cpu |= X264_CPU_SHUFFLE_IS_FAST;

//2048 #define X264_CPU_SHUFFLE_IS_FAST 0x000800 /* Penryn, Nehalem, and Phenom have fast shuffle units */

x264_cpu_cpuid( 0x80000000, &eax, &ebx, &ecx, &edx );

//先使 EAX 设为 80000000H,再执行 CPUID,在返回时,EAX 就存有最大扩展功能的索引值。得到

//最大索引值后,EAX 就应该在这个最大值内去『呼叫』CPUID,如果你用比最大索引值还大的数值存于 //EAX,去执行 CPUID,结果和以最大索引值一样。

    max_extended_cap = eax;

//下面是处理赛扬AMD芯片的(略)

    if( !strcmp((char*)vendor, "AuthenticAMD") && max_extended_cap >= 0x80000001 )

    {

        x264_cpu_cpuid( 0x80000001, &eax, &ebx, &ecx, &edx );

        if( edx&0x00400000 )

            cpu |= X264_CPU_MMXEXT;

        if( cpu & X264_CPU_SSE2 )

        {

            if( ecx&0x00000040 ) /* SSE4a */

            {

                cpu |= X264_CPU_SSE2_IS_FAST;

                cpu |= X264_CPU_SSE_MISALIGN;

                cpu |= X264_CPU_LZCNT;

                cpu |= X264_CPU_SHUFFLE_IS_FAST;

                x264_cpu_mask_misalign_sse();

            }

            else

                cpu |= X264_CPU_SSE2_IS_SLOW;

        }

    }

//INTEL的CPU处理

    if( !strcmp((char*)vendor, "GenuineIntel") )//判断制造商名称字符串

    {

        int family, model, stepping;

        x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );

        family = ((eax>>8)&0xf) + ((eax>>20)&0xff);//cpu等级

        model  = ((eax>>4)&0xf) + ((eax>>12)&0xf0);//cpu核心

        stepping = eax&0xf;                        //cpu工艺制成

        /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")

         * theoretically support sse2, but it's significantly slower than mmx for

         * almost all of x264's functions, so let's just pretend they don't. */

        if( family==6 && (model==9 || model==13 || model==14) )

        {

            cpu &= ~(X264_CPU_SSE2|X264_CPU_SSE3);

            assert(!(cpu&(X264_CPU_SSSE3|X264_CPU_SSE4)));

        }

    }

//CACHE LINE是在主存和CACHE之间传送的最小单位,下面用来判断cacheline的大小

if( (!strcmp((char*)vendor, "GenuineIntel") || !strcmp((char*)vendor, "CyrixInstead")) && !(cpu&X264_CPU_SSE42))

    {

        /* cacheline size is specified in 3 places, any of which may be missing */

        x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );

        cache = (ebx&0xff00)>>5; // cflush size

//64EBX的-16位保留其他位致零后左移位

        if( !cache && max_extended_cap >= 0x80000006 )

        {

            x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );

            cache = ecx&0xff; // cacheline size

        }

        if( !cache )//cache值为零情况

        {

            // Cache and TLB Information

            static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };

            static const char cache64_ids[] = { 0x22, 0x23, 0x25, 0x29, 0x2c, 0x46, 0x47, 0x49, 0x60, 0x66, 0x67, 0x68, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7c, 0x7f, 0x86, 0x87, 0 };

            uint32_t buf[4];

            int max, i=0, j;

            do {

                x264_cpu_cpuid( 2, buf+0, buf+1, buf+2, buf+3 );

                max = buf[0]&0xff;

                buf[0] &= ~0xff;

                for(j=0; j<4; j++)

                    if( !(buf[j]>>31) )

                        while( buf[j] )

                        {

                            if( strchr( cache32_ids, buf[j]&0xff ) )

                                cache = 32;

                            if( strchr( cache64_ids, buf[j]&0xff ) )

                                cache = 64;

                            buf[j] >>= 8;

                        }

            } while( ++i < max );

        }

        if( cache == 32 )

            cpu |= X264_CPU_CACHELINE_32;

//1 #define X264_CPU_CACHELINE_32   0x000001  /* avoid memory loads that span the border between two cachelines */

 

        else if( cache == 64 )

            cpu |= X264_CPU_CACHELINE_64;

//2 #define X264_CPU_CACHELINE_64   0x000002  /* 32/64 is the size of a cacheline in bytes */

        else

            fprintf( stderr, "x264 [warning]: unable to determine cacheline size\n" );

    }

 

#ifdef BROKEN_STACK_ALIGNMENT //字节对齐方式

cpu |= X264_CPU_STACK_MOD4;

//4096 #define X264_CPU_STACK_MOD4     0x001000  /* if stack is only mod4 and not mod16 */

#endif

 

    return cpu;

}

附录一:x264_cpu_cpuid_test(cpu-a.asm)

;-----------------------------------------------------------------------------

; int x264_cpu_cpuid_test( void )

; return 0 if unsupported          ;检查是否可以使用cpuid指令

; 如果 CPU 标志( EFLAG ) 的第 21 位 ( 称为 ID ) 可以由软件写入的话,那么 CPU 可以执行 CPUID

;-----------------------------------------------------------------------------

cglobal x264_cpu_cpuid_test

    pushfd                          ;标志寄存器EFLAG入栈

    push    ebx                     ;基底寄存器EBX入栈

    push    ebp                     ;基底指针寄存器EBP入栈

    push    esi                     ;源索引寄存器ESI入栈

    push    edi                     ;目的索引寄存器EDI入栈

    pushfd                          ;标志寄存器EFLAG入栈

    pop     eax                     ;将堆栈中的标志寄存器内容弹出到累计寄存器EAX

    mov     ebx, eax                ;将EAX数据传送到EBX,保存旧的标志

    xor     eax, 0x200000           ;异或运算,改变标志位的第21位2097152

    push    eax                     ;将改变后的标志压入堆栈

    popfd                           ;存入标志

    pushfd                          ;取出标志

    pop     eax                     ;将堆栈中的标志寄存器内容弹出到累计寄存器EAX

    xor     eax, ebx                ;异或运算,判断第21位,存入EAX中

    pop     edi

    pop     esi

    pop     ebp

    pop     ebx

    popfd                            ;恢复

ret

附录二:x264_cpu_cpuid

;-----------------------------------------------------------------------------

; int x264_cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )

;通过执行cpuid,返回信息给EAX、EBX、ECX、EDX

;-----------------------------------------------------------------------------

cglobal x264_cpu_cpuid, 0,6

    mov     eax,    r0m                ;将参数一的值传给EAX

    cpuid                              ;重点的指令

    mov     esi,    r1m                ; 将参数二的值送给ESI

    mov     [esi],  eax                ;将寄存器EAX中内容放入EDS:ESI指向的内存(下同)

    mov     esi,    r2m

    mov     [esi],  ebx

    mov     esi,    r3m

    mov     [esi],  ecx

    mov     esi,    r4m

    mov     [esi],  edx              ;同上

    RET