韩语topik考试报名网站:x264_cpu_detect
来源:百度文库 编辑:九乡新闻网 时间:2024/04/28 16:51:20
uint32_t x264_cpu_detect( void )
{
uint32_t cpu = 0;//定义CPU型号
uint32_t eax, ebx, ecx, edx;//扩展的寄存器
uint32_t vendor[4] = {0};//共同表示制造厂商的名称字符串
int max_extended_cap;//最大扩展功能索引值
int cache;//高速缓存类型
#ifndef ARCH_X86_64// [U1]
//此处表示非64位架构
if( !x264_cpu_cpuid_test() )//见附录一,检测CPU是否能够执行cpuid来检测CPU信息
return 0;//不能则返回,cpu值为0
#endif
x264_cpu_cpuid( 0, &eax, vendor+0, vendor+2, vendor+1 );//见附录二,下同
//利用 CPUID 指令获得 CPU 数据,一般是以 EAX 当做索引值,所给定的 EAX 索引值不同,就
//可以获得不同的 CPU 数据。这些索引值可分为两大类:基本功能 ( standard function ) 与延伸//功能 ( extended function )
if( eax == 0 )//EAX基本功能最大索引值,无则返回
return 0;
x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );//基本功能一,CPU等级、制程及某些 CPU 特性
if( edx&0x00800000 ) //EDX的23位表示是否支持MMX指令集
cpu |= X264_CPU_MMX;//8 #define X264_CPU_MMX 0x000008 (x264.h)
else
return 0;
if( edx&0x02000000 ) //EDX的25位表示是否支持 SSE 指令集
cpu |= X264_CPU_MMXEXT|X264_CPU_SSE;
//16 #define X264_CPU_MMXEXT 0x000010 /* MMX2 aka MMXEXT aka ISSE */
//32 #define X264_CPU_SSE 0x000020
if( edx&0x04000000 ) //EDX的26位表示是否支持SSE2指令集
cpu |= X264_CPU_SSE2;//64 #define X264_CPU_SSE2 0x000040
if( ecx&0x00000001 ) //ECX的0位表示是否支持SSE3指令集
cpu |= X264_CPU_SSE3;//512 #define X264_CPU_SSE3 0x000200
if( ecx&0x00000200 ) //ECX的9位表示是否支持SSSE3指令集
cpu |= X264_CPU_SSSE3;//1024 #define X264_CPU_SSSE3 0x000400
if( ecx&0x00080000 ) //ECX的19位表示是否支持SSE4指令集
cpu |= X264_CPU_SSE4;//8192 #define X264_CPU_SSE4 0x002000 /* SSE4.1 */
if( ecx&0x00100000 ) //ECX的20位表示是否支持SSE42指令集
cpu |= X264_CPU_SSE42;// #define X264_CPU_SSE42 0x004000 /* SSE4.2 */
if( cpu & X264_CPU_SSSE3 )
cpu |= X264_CPU_SSE2_IS_FAST;
//256 #define X264_CPU_SSE2_IS_FAST 0x000100 /* a few functions are on
if( cpu & X264_CPU_SSE4 )
cpu |= X264_CPU_SHUFFLE_IS_FAST;
//2048 #define X264_CPU_SHUFFLE_IS_FAST 0x000800 /* Penryn, Nehalem, and Phenom have fast shuffle units */
x264_cpu_cpuid( 0x80000000, &eax, &ebx, &ecx, &edx );
//先使 EAX 设为 80000000H,再执行 CPUID,在返回时,EAX 就存有最大扩展功能的索引值。得到
//最大索引值后,EAX 就应该在这个最大值内去『呼叫』CPUID,如果你用比最大索引值还大的数值存于 //EAX,去执行 CPUID,结果和以最大索引值一样。
max_extended_cap = eax;
//下面是处理赛扬AMD芯片的(略)
if( !strcmp((char*)vendor, "AuthenticAMD") && max_extended_cap >= 0x80000001 )
{
x264_cpu_cpuid( 0x80000001, &eax, &ebx, &ecx, &edx );
if( edx&0x00400000 )
cpu |= X264_CPU_MMXEXT;
if( cpu & X264_CPU_SSE2 )
{
if( ecx&0x00000040 ) /* SSE4a */
{
cpu |= X264_CPU_SSE2_IS_FAST;
cpu |= X264_CPU_SSE_MISALIGN;
cpu |= X264_CPU_LZCNT;
cpu |= X264_CPU_SHUFFLE_IS_FAST;
x264_cpu_mask_misalign_sse();
}
else
cpu |= X264_CPU_SSE2_IS_SLOW;
}
}
//INTEL的CPU处理
if( !strcmp((char*)vendor, "GenuineIntel") )//判断制造商名称字符串
{
int family, model, stepping;
x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
family = ((eax>>8)&0xf) + ((eax>>20)&0xff);//cpu等级
model = ((eax>>4)&0xf) + ((eax>>12)&0xf0);//cpu核心
stepping = eax&0xf; //cpu工艺制成
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and 6/14 (core1 "yonah")
* theoretically support sse2, but it's significantly slower than mmx for
* almost all of x264's functions, so let's just pretend they don't. */
if( family==6 && (model==9 || model==13 || model==14) )
{
cpu &= ~(X264_CPU_SSE2|X264_CPU_SSE3);
assert(!(cpu&(X264_CPU_SSSE3|X264_CPU_SSE4)));
}
}
//CACHE LINE是在主存和CACHE之间传送的最小单位,下面用来判断cacheline的大小
if( (!strcmp((char*)vendor, "GenuineIntel") || !strcmp((char*)vendor, "CyrixInstead")) && !(cpu&X264_CPU_SSE42))
{
/* cacheline size is specified in 3 places, any of which may be missing */
x264_cpu_cpuid( 1, &eax, &ebx, &ecx, &edx );
cache = (ebx&0xff00)>>5; // cflush size
//64EBX的-16位保留其他位致零后左移位
if( !cache && max_extended_cap >= 0x80000006 )
{
x264_cpu_cpuid( 0x80000006, &eax, &ebx, &ecx, &edx );
cache = ecx&0xff; // cacheline size
}
if( !cache )//cache值为零情况
{
// Cache and TLB Information
static const char cache32_ids[] = { 0x0a, 0x0c, 0x41, 0x42, 0x43, 0x44, 0x45, 0x82, 0x83, 0x84, 0x85, 0 };
static const char cache64_ids[] = { 0x22, 0x23, 0x25, 0x29, 0x2c, 0x46, 0x47, 0x49, 0x60, 0x66, 0x67, 0x68, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7c, 0x7f, 0x86, 0x87, 0 };
uint32_t buf[4];
int max, i=0, j;
do {
x264_cpu_cpuid( 2, buf+0, buf+1, buf+2, buf+3 );
max = buf[0]&0xff;
buf[0] &= ~0xff;
for(j=0; j<4; j++)
if( !(buf[j]>>31) )
while( buf[j] )
{
if( strchr( cache32_ids, buf[j]&0xff ) )
cache = 32;
if( strchr( cache64_ids, buf[j]&0xff ) )
cache = 64;
buf[j] >>= 8;
}
} while( ++i < max );
}
if( cache == 32 )
cpu |= X264_CPU_CACHELINE_32;
//1 #define X264_CPU_CACHELINE_32 0x000001 /* avoid memory loads that span the border between two cachelines */
else if( cache == 64 )
cpu |= X264_CPU_CACHELINE_64;
//2 #define X264_CPU_CACHELINE_64 0x000002 /* 32/64 is the size of a cacheline in bytes */
else
fprintf( stderr, "x264 [warning]: unable to determine cacheline size\n" );
}
#ifdef BROKEN_STACK_ALIGNMENT //字节对齐方式
cpu |= X264_CPU_STACK_MOD4;
//4096 #define X264_CPU_STACK_MOD4 0x001000 /* if stack is on
#endif
return cpu;
}
附录一:x264_cpu_cpuid_test(cpu-a.asm)
;-----------------------------------------------------------------------------
; int x264_cpu_cpuid_test( void )
; return 0 if unsupported ;检查是否可以使用cpuid指令
; 如果 CPU 标志( EFLAG ) 的第 21 位 ( 称为 ID ) 可以由软件写入的话,那么 CPU 可以执行 CPUID
;-----------------------------------------------------------------------------
cglobal x264_cpu_cpuid_test
pushfd ;标志寄存器EFLAG入栈
push ebx ;基底寄存器EBX入栈
push ebp ;基底指针寄存器EBP入栈
push esi ;源索引寄存器ESI入栈
push edi ;目的索引寄存器EDI入栈
pushfd ;标志寄存器EFLAG入栈
pop eax ;将堆栈中的标志寄存器内容弹出到累计寄存器EAX
mov ebx, eax ;将EAX数据传送到EBX,保存旧的标志
xor eax, 0x200000 ;异或运算,改变标志位的第21位2097152
push eax ;将改变后的标志压入堆栈
popfd ;存入标志
pushfd ;取出标志
pop eax ;将堆栈中的标志寄存器内容弹出到累计寄存器EAX
xor eax, ebx ;异或运算,判断第21位,存入EAX中
pop edi
pop esi
pop ebp
pop ebx
popfd ;恢复
ret
附录二:x264_cpu_cpuid
;-----------------------------------------------------------------------------
; int x264_cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
;通过执行cpuid,返回信息给EAX、EBX、ECX、EDX
;-----------------------------------------------------------------------------
cglobal x264_cpu_cpuid, 0,6
mov eax, r0m ;将参数一的值传给EAX
cpuid ;重点的指令
mov esi, r1m ; 将参数二的值送给ESI
mov [esi], eax ;将寄存器EAX中内容放入EDS:ESI指向的内存(下同)
mov esi, r2m
mov [esi], ebx
mov esi, r3m
mov [esi], ecx
mov esi, r4m
mov [esi], edx ;同上
RET