この日記はGNSで生成しています。 |
_ アルファ合成。ウチのK6-2マシンでいろいろ試したところ、別に「2ループ回したほうが速い」といった症状は出ずじまい。VRAMじゃなくてメインRAMでのテストなので条件が違うのだが、まず結果表。gccの一部の結果が変なのは、cygwinがかなり古いせいかちゃんとした時間が表示されなかったため。
CPU FUNC MS-C BCC gcc a trans01 3104 1953 2022 trans02 2944 1963 1903 trans03 2344 1051 1362 b trans01 6480 5411 -- trans02 5550 4744 -- trans03 3460 3295 -- c trans01 16593 19889 17655 trans02 11917 13519 12287 trans03 9133 9043 15322 d trans01 17925 23114 22772 trans02 11827 15712 15803 trans03 9043 11086 16554 a: Pentium3-500E (133x5.5=733MHz) b: K6-2-400 ( 66x6.0=400MHz) c: MMX Pentium-200 ( 66x3.0=200MHz) d: Pentium-133 ( 66x2.0=133MHz)
_ コードはこんな感じ。trans1は2回目のループだけ取り出してみた。
void trans01( void *dst, void *src1, void *src2, unsigned char i ) { { register unsigned int *sl1,*sl3; sl1=(unsigned int *)(src2); sl3=(unsigned int *)(dst); for(register int y=640*480*3/sizeof(unsigned int);y;y--) *sl3++ = *sl1++; } { register unsigned char *sl2,*sl3; sl2=(unsigned char *)(src1); sl3=(unsigned char *)(dst); for(register int y=640*480*3;y;y--) *sl3++ += ((unsigned char)i*(*sl2++ - *sl3))>>8; } /* =============================================================================== Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 12.00.8168 for 80x86 >cl -O2 -Og -Oi test.cpp 00401085 8a08 mov cl,[eax] 00401087 33d2 xor edx,edx 00401089 8a1407 mov dl,[edi+eax] 0040108c 8bd9 mov ebx,ecx 0040108e 81e3ff000000 and ebx,000000ff 00401094 2bd3 sub edx,ebx 00401096 0fafd6 imul edx,esi 00401099 c1fa08 sar edx,08 0040109c 02d1 add dl,cl 0040109e 8810 mov [eax],dl 004010a0 40 inc eax 004010a1 4d dec ebp 004010a2 75e1 jnz 401085 =============================================================================== Borland C++ 5.0 for Win32 Copyright (c) 1993, 1996 Borland International >bcc32 -O test.cpp 004010f0 33db xor ebx,ebx 004010f2 8a1a mov bl,[edx] 004010f4 0fb630 movzx si,byte ptr [eax] 004010f7 2bde sub ebx,esi 004010f9 0fb67514 movzx si,byte ptr [ebp+14] 004010fd 0fafde imul ebx,esi 00401100 c1fb08 sar ebx,08 00401103 0018 add [eax],bl 00401105 42 inc edx 00401106 40 inc eax 00401107 49 dec ecx 00401108 85c9 test ecx,ecx 0040110a 75e4 jnz 4010f0 =============================================================================== gcc driver version 2.7-B19 executing gcc version 2.7-97r2aBeta >gcc -O3 test.cpp 00401260 0fb606 movzx ax,byte ptr [esi] 00401263 0fb611 movzx dx,byte ptr [ecx] 00401266 29d0 sub eax,edx 00401268 0fafc7 imul eax,edi 0040126b c1f808 sar eax,08 0040126e 0001 add [ecx],al 00401270 46 inc esi 00401271 41 inc ecx 00401272 4b dec ebx 00401273 75eb jnz 401260 */ } void trans02( void *dst, void *src1, void *src2, unsigned char i ) { { register unsigned int *sl1,*sl3; sl1=(unsigned int *)(src2); sl3=(unsigned int *)(dst); for(register int y=640*480*3/sizeof(unsigned int);y;y--) *sl3++=*sl1++; } { unsigned char *a = &alphatbl[511*i+256]; register unsigned char *sl2,*sl3; sl2=(unsigned char *)(src1); sl3=(unsigned char *)(dst); for(register int y=640*480*3;y;y--) {*sl3 += a[*sl2++ - *sl3]; sl3++; } } /* =============================================================================== Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 12.00.8168 for 80x86 >cl -O2 -Og -Oi test.cpp 004010f1 8a08 mov cl,[eax] 004010f3 33d2 xor edx,edx 004010f5 8a1407 mov dl,[edi+eax] 004010f8 8bd9 mov ebx,ecx 004010fa 81e3ff000000 and ebx,000000ff 00401100 2bd3 sub edx,ebx 00401102 8a1432 mov dl,[edx+esi] 00401105 02d1 add dl,cl 00401107 8810 mov [eax],dl 00401109 40 inc eax 0040110a 4d dec ebp 0040110b 75e4 jnz 4010f1 =============================================================================== Borland C++ 5.0 for Win32 Copyright (c) 1993, 1996 Borland International >bcc32 -O test.cpp 0040114d 33db xor ebx,ebx 0040114f 8a1a mov bl,[edx] 00401151 0fb638 movzx di,byte ptr [eax] 00401154 2bdf sub ebx,edi 00401156 8a1c1e mov bl,[esi+ebx] 00401159 0018 add [eax],bl 0040115b 42 inc edx 0040115c 40 inc eax 0040115d 49 dec ecx 0040115e 85c9 test ecx,ecx 00401160 75eb jnz 40114d =============================================================================== gcc driver version 2.7-B19 executing gcc version 2.7-97r2aBeta >gcc -O3 test.cpp 00401200 0fb606 movzx ax,byte ptr [esi] 00401203 0fb611 movzx dx,byte ptr [ecx] 00401206 29d0 sub eax,edx 00401208 8b7dfc mov edi,dword ptr [ebp-04] 0040120b 8a0438 mov al,[eax+edi] 0040120e 0001 add [ecx],al 00401210 46 inc esi 00401211 41 inc ecx 00401212 4b dec ebx 00401213 75eb jnz 401200 */ } void trans03( void *dst, void *src1, void *src2, unsigned char i ) { { unsigned char *a = &alphatbl[511*i+256]; register unsigned char *sl1, *sl2,*sl3; sl1=(unsigned char *)(src1); sl2=(unsigned char *)(src2); sl3=(unsigned char *)(dst); for(register int y=640*480*3;y;y--) {*sl3 = *sl1+a[*sl2++ - *sl1]; sl3++; } } /* =============================================================================== Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 12.00.8168 for 80x86 >cl -O2 -Og -Oi test.cpp 00401149 8b4c2418 mov ecx,dword ptr [esp+18] 0040114d 33d2 xor edx,edx 0040114f 8a10 mov dl,[eax] 00401151 8a09 mov cl,[ecx] 00401153 8bd9 mov ebx,ecx 00401155 81e3ff000000 and ebx,000000ff 0040115b 2bd3 sub edx,ebx 0040115d 8a1432 mov dl,[edx+esi] 00401160 02d1 add dl,cl 00401162 881407 mov [edi+eax],dl 00401165 40 inc eax 00401166 4d dec ebp 00401167 75e0 jnz 401149 =============================================================================== Borland C++ 5.0 for Win32 Copyright (c) 1993, 1996 Borland International >bcc32 -O test.cpp 00401191 33db xor ebx,ebx 00401193 8a18 mov bl,[eax] 00401195 0fb63e movzx di,byte ptr [esi] 00401198 2bdf sub ebx,edi 0040119a 8b7dfc mov edi,dword ptr [ebp-04] 0040119d 8a1c1f mov bl,[edi+ebx] 004011a0 021e add bl,[esi] 004011a2 881a mov [edx],bl 004011a4 40 inc eax 004011a5 42 inc edx 004011a6 49 dec ecx 004011a7 85c9 test ecx,ecx 004011a9 75e6 jnz 401191 =============================================================================== gcc driver version 2.7-B19 executing gcc version 2.7-97r2aBeta >gcc -O3 test.cpp 00401270 0fb606 movzx ax,byte ptr [esi] 00401273 8b7d0c mov edi,dword ptr [ebp+0c] 00401276 0fb617 movzx dx,byte ptr [edi] 00401279 29d0 sub eax,edx 0040127b 8b7dfc mov edi,dword ptr [ebp-04] 0040127e 8a0438 mov al,[eax+edi] 00401281 8b7d0c mov edi,dword ptr [ebp+0c] 00401284 0207 add al,[edi] 00401286 8803 mov [ebx],al 00401288 46 inc esi 00401289 43 inc ebx 0040128a 49 dec ecx 0040128b 75e3 jnz 401270 */ }
_
・・・長い?(笑)
メールはこちらへ...[後藤浩昭 / Hiroaki GOTO / GORRY / gorry@hauN.org]