Navigation Log - なびろぐ -
2000/07/18

...X680x0 spirit inside...

この日記はGNSで生成しています。
この日記の画像はOPTPiX webDesigner Ver.4で生成しています。

hauN
Go amazon.co.jp

■ご注文リストから■ [一覧]
■Amazonギフト券■ [購入]
ゆゆ式 1巻 []
ゆゆ式 1巻
Amazonほしい物リスト


2000/07/18 (火) 晴れ

アルファ合成コード

_ アルファ合成。ウチのK6-2マシンでいろいろ試したところ、別に「2ループ回したほうが速い」といった症状は出ずじまい。VRAMじゃなくてメインRAMでのテストなので条件が違うのだが、まず結果表。gccの一部の結果が変なのは、cygwinがかなり古いせいかちゃんとした時間が表示されなかったため。


CPU     FUNC   MS-C    BCC    gcc
  a  trans01   3104   1953   2022
     trans02   2944   1963   1903
     trans03   2344   1051   1362
  b  trans01   6480   5411  --
     trans02   5550   4744  --
     trans03   3460   3295  --
  c  trans01  16593  19889  17655
     trans02  11917  13519  12287
     trans03   9133   9043  15322
  d  trans01  17925  23114  22772
     trans02  11827  15712  15803
     trans03   9043  11086  16554

a: Pentium3-500E   (133x5.5=733MHz)
b: K6-2-400        ( 66x6.0=400MHz)
c: MMX Pentium-200 ( 66x3.0=200MHz)
d: Pentium-133     ( 66x2.0=133MHz)

_ コードはこんな感じ。trans1は2回目のループだけ取り出してみた。


void trans01( void *dst, void *src1, void *src2, unsigned char i )
{
	{
		register unsigned int *sl1,*sl3;
		sl1=(unsigned int *)(src2);
		sl3=(unsigned int *)(dst);
		for(register int y=640*480*3/sizeof(unsigned int);y;y--) *sl3++ = *sl1++;
	}
	{
		register unsigned char *sl2,*sl3;
		sl2=(unsigned char *)(src1);
		sl3=(unsigned char *)(dst);
		for(register int y=640*480*3;y;y--) *sl3++ += ((unsigned char)i*(*sl2++ - *sl3))>>8;
	}
/*
===============================================================================
Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 12.00.8168 for 80x86
>cl -O2 -Og -Oi test.cpp
00401085 8a08               mov     cl,[eax]
00401087 33d2               xor     edx,edx
00401089 8a1407             mov     dl,[edi+eax]
0040108c 8bd9               mov     ebx,ecx
0040108e 81e3ff000000       and     ebx,000000ff
00401094 2bd3               sub     edx,ebx
00401096 0fafd6             imul    edx,esi
00401099 c1fa08             sar     edx,08
0040109c 02d1               add     dl,cl
0040109e 8810               mov     [eax],dl
004010a0 40                 inc     eax
004010a1 4d                 dec     ebp
004010a2 75e1               jnz     401085

===============================================================================
Borland C++ 5.0 for Win32 Copyright (c) 1993, 1996 Borland International
>bcc32 -O test.cpp
004010f0 33db               xor     ebx,ebx
004010f2 8a1a               mov     bl,[edx]
004010f4 0fb630             movzx   si,byte ptr [eax]
004010f7 2bde               sub     ebx,esi
004010f9 0fb67514           movzx   si,byte ptr [ebp+14]
004010fd 0fafde             imul    ebx,esi
00401100 c1fb08             sar     ebx,08
00401103 0018               add     [eax],bl
00401105 42                 inc     edx
00401106 40                 inc     eax
00401107 49                 dec     ecx
00401108 85c9               test    ecx,ecx
0040110a 75e4               jnz     4010f0

===============================================================================
gcc driver version 2.7-B19 executing gcc version 2.7-97r2aBeta
>gcc -O3 test.cpp
00401260 0fb606             movzx   ax,byte ptr [esi]
00401263 0fb611             movzx   dx,byte ptr [ecx]
00401266 29d0               sub     eax,edx
00401268 0fafc7             imul    eax,edi
0040126b c1f808             sar     eax,08
0040126e 0001               add     [ecx],al
00401270 46                 inc     esi
00401271 41                 inc     ecx
00401272 4b                 dec     ebx
00401273 75eb               jnz     401260
*/

}

void trans02( void *dst, void *src1, void *src2, unsigned char i )
{
	{
		register unsigned int *sl1,*sl3;
		sl1=(unsigned int *)(src2);
		sl3=(unsigned int *)(dst);
		for(register int y=640*480*3/sizeof(unsigned int);y;y--) *sl3++=*sl1++;
	}
	{
		unsigned char *a = &alphatbl[511*i+256];
		register unsigned char *sl2,*sl3;
		sl2=(unsigned char *)(src1);
		sl3=(unsigned char *)(dst);
		for(register int y=640*480*3;y;y--) {*sl3 += a[*sl2++ - *sl3]; sl3++; }
	}
/*
===============================================================================
Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 12.00.8168 for 80x86
>cl -O2 -Og -Oi test.cpp
004010f1 8a08               mov     cl,[eax]
004010f3 33d2               xor     edx,edx
004010f5 8a1407             mov     dl,[edi+eax]
004010f8 8bd9               mov     ebx,ecx
004010fa 81e3ff000000       and     ebx,000000ff
00401100 2bd3               sub     edx,ebx
00401102 8a1432             mov     dl,[edx+esi]
00401105 02d1               add     dl,cl
00401107 8810               mov     [eax],dl
00401109 40                 inc     eax
0040110a 4d                 dec     ebp
0040110b 75e4               jnz     4010f1

===============================================================================
Borland C++ 5.0 for Win32 Copyright (c) 1993, 1996 Borland International
>bcc32 -O test.cpp
0040114d 33db               xor     ebx,ebx
0040114f 8a1a               mov     bl,[edx]
00401151 0fb638             movzx   di,byte ptr [eax]
00401154 2bdf               sub     ebx,edi
00401156 8a1c1e             mov     bl,[esi+ebx]
00401159 0018               add     [eax],bl
0040115b 42                 inc     edx
0040115c 40                 inc     eax
0040115d 49                 dec     ecx
0040115e 85c9               test    ecx,ecx
00401160 75eb               jnz     40114d

===============================================================================
gcc driver version 2.7-B19 executing gcc version 2.7-97r2aBeta
>gcc -O3 test.cpp
00401200 0fb606             movzx   ax,byte ptr [esi]
00401203 0fb611             movzx   dx,byte ptr [ecx]
00401206 29d0               sub     eax,edx
00401208 8b7dfc             mov     edi,dword ptr [ebp-04]
0040120b 8a0438             mov     al,[eax+edi]
0040120e 0001               add     [ecx],al
00401210 46                 inc     esi
00401211 41                 inc     ecx
00401212 4b                 dec     ebx
00401213 75eb               jnz     401200
*/
}

void trans03( void *dst, void *src1, void *src2, unsigned char i )
{
	{
		unsigned char *a = &alphatbl[511*i+256];
		register unsigned char *sl1, *sl2,*sl3;
		sl1=(unsigned char *)(src1);
		sl2=(unsigned char *)(src2);
		sl3=(unsigned char *)(dst);
		for(register int y=640*480*3;y;y--) {*sl3 = *sl1+a[*sl2++ - *sl1]; sl3++; }
	}
/*
===============================================================================
Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 12.00.8168 for 80x86
>cl -O2 -Og -Oi test.cpp
00401149 8b4c2418           mov     ecx,dword ptr [esp+18]
0040114d 33d2               xor     edx,edx
0040114f 8a10               mov     dl,[eax]
00401151 8a09               mov     cl,[ecx]
00401153 8bd9               mov     ebx,ecx
00401155 81e3ff000000       and     ebx,000000ff
0040115b 2bd3               sub     edx,ebx
0040115d 8a1432             mov     dl,[edx+esi]
00401160 02d1               add     dl,cl
00401162 881407             mov     [edi+eax],dl
00401165 40                 inc     eax
00401166 4d                 dec     ebp
00401167 75e0               jnz     401149

===============================================================================
Borland C++ 5.0 for Win32 Copyright (c) 1993, 1996 Borland International
>bcc32 -O test.cpp
00401191 33db               xor     ebx,ebx
00401193 8a18               mov     bl,[eax]
00401195 0fb63e             movzx   di,byte ptr [esi]
00401198 2bdf               sub     ebx,edi
0040119a 8b7dfc             mov     edi,dword ptr [ebp-04]
0040119d 8a1c1f             mov     bl,[edi+ebx]
004011a0 021e               add     bl,[esi]
004011a2 881a               mov     [edx],bl
004011a4 40                 inc     eax
004011a5 42                 inc     edx
004011a6 49                 dec     ecx
004011a7 85c9               test    ecx,ecx
004011a9 75e6               jnz     401191

===============================================================================
gcc driver version 2.7-B19 executing gcc version 2.7-97r2aBeta
>gcc -O3 test.cpp
00401270 0fb606             movzx   ax,byte ptr [esi]
00401273 8b7d0c             mov     edi,dword ptr [ebp+0c]
00401276 0fb617             movzx   dx,byte ptr [edi]
00401279 29d0               sub     eax,edx
0040127b 8b7dfc             mov     edi,dword ptr [ebp-04]
0040127e 8a0438             mov     al,[eax+edi]
00401281 8b7d0c             mov     edi,dword ptr [ebp+0c]
00401284 0207               add     al,[edi]
00401286 8803               mov     [ebx],al
00401288 46                 inc     esi
00401289 43                 inc     ebx
0040128a 49                 dec     ecx
0040128b 75e3               jnz     401270

*/
}

_ ・・・長い?(笑)


[URL] [View Log()] [Trackback]
Name: Comment:



メールはこちらへ...[後藤浩昭 / Hiroaki GOTO / GORRY / gorry@hauN.org]

「表紙へ戻る」
「会議室」 「最新版」 「なびろぐindex」 「GNSソースを読む」