0x1 加法与减法的优化原理
1.1 加法的识别与优化
加法优化有3种方案:
1)形式1:变量与变量
2)形式2:变量加常量
3)形式3:变量加1
C源代码:
int _tmain(int argc, _TCHAR* argv[])
{
int nNum, nA = 8;
nNum = argc + nA; // 形式1:变量减变量
printf("%d\r\n",nNum);
nNum = argc + 9; // 形式2:变量减常量
printf("%d\r\n",nNum);
nNum = nNum + 1; // 形式3:变量减1
printf("%d\r\n",nNum);
return 0;
}
Debug反汇编:
01297710 55 push ebp
01297711 8BEC mov ebp,esp
01297713 81EC D8000000 sub esp,0xD8
01297719 53 push ebx
0129771A 56 push esi ; 9-34.<ModuleEntryPoint>
0129771B 57 push edi
0129771C 8DBD 28FFFFFF lea edi,[local.54]
01297722 B9 36000000 mov ecx,0x36
01297727 B8 CCCCCCCC mov eax,0xCCCCCCCC
0129772C F3:AB rep stos dword ptr es:[edi]
0129772E C745 EC 08000 mov [local.5],0x8 ; nA = 8
01297735 8B45 08 mov eax,[arg.1] ; eax = argc 赋值
01297738 0345 EC add eax,[local.5] ; eax = eax + nA;形式1:nNum = agrc + nA
0129773B 8945 F8 mov [local.2],eax ; nNum = eax
0129773E 8B45 F8 mov eax,[local.2] ; eax = nNum
01297741 50 push eax
01297742 68 508E3201 push 9-34.01328E50 ; %d\r\n
01297747 E8 C8C5FFFF call 9-34.01293D14 ; printf
0129774C 83C4 08 add esp,0x8 ; 堆栈平衡
0129774F 8B45 08 mov eax,[arg.1] ; eax = argc
01297752 83C0 09 add eax,0x9 ; eax = eax + 9;argc + 9;形式2:nNum = argc + 9
01297755 8945 F8 mov [local.2],eax ; nNum = eax
01297758 8B45 F8 mov eax,[local.2] ; eax = nNum
0129775B 50 push eax
0129775C 68 508E3201 push 9-34.01328E50 ; %d\r\n
01297761 E8 AEC5FFFF call 9-34.01293D14 ; printf
01297766 83C4 08 add esp,0x8
01297769 8B45 F8 mov eax,[local.2] ; eax = nNum
0129776C 83C0 01 add eax,0x1 ; eax = eax + 1;形式3:nNum = nNum+1
0129776F 8945 F8 mov [local.2],eax ; nNum = eax
01297772 8B45 F8 mov eax,[local.2] ; eax = nNum
01297775 50 push eax
01297776 68 508E3201 push 9-34.01328E50 ; %d\r\n
0129777B E8 94C5FFFF call 9-34.01293D14 ; printf
01297780 83C4 08 add esp,0x8
01297783 33C0 xor eax,eax
01297785 5F pop edi ; 9-34.<ModuleEntryPoint>
01297786 5E pop esi ; 9-34.<ModuleEntryPoint>
01297787 5B pop ebx ; 9-34.<ModuleEntryPoint>
01297788 81C4 D8000000 add esp,0xD8
0129778E 3BEC cmp ebp,esp
01297790 E8 94ABFFFF call 9-34.01292329
01297795 8BE5 mov esp,ebp
01297797 5D pop ebp ; 9-34.<ModuleEntryPoint>
01297798 C3 retn
Release反汇编:
00EB1260 push ebp
00EB1261 mov ebp,esp
00EB1263 push esi
00EB1264 mov esi,[arg.1] ; esi = argc
00EB1267 lea eax,dword ptr ds:[esi+0x8] ; eax = argc+nA;形式1:变量+变量;优化后的加法,精妙!
00EB126A push eax
00EB126B push 9-34.00EC64D8 ; %d\r\n
00EB1270 call 9-34.printf_initialize_sse2_sse2_from_osKeywordOncrt_st>; printf
00EB1275 add esi,0x9 ; esi = esi+9;argc+9;形式2:变量+常量
00EB1278 push esi
00EB1279 push 9-34.00EC64D8 ; %d\r\n
00EB127E call 9-34.printf_initialize_sse2_sse2_from_osKeywordOncrt_st>; printf
00EB1283 lea eax,dword ptr ds:[esi+0x1] ; eax = esi + 1;nNum+1;形式3:变量+1;直接用esi进行地址的存储
00EB1286 push eax
00EB1287 push 9-34.00EC64D8 ; %d\r\n
00EB128C call 9-34.printf_initialize_sse2_sse2_from_osKeywordOncrt_st>
00EB1291 add esp,0x18
00EB1294 xor eax,eax
00EB1296 pop esi ; 9-34.__argc_log10_table_tk_exit_tableate
00EB1297 pop ebp ; 9-34.__argc_log10_table_tk_exit_tableate
00EB1298 retn
小结:
1)变量+变量 = lea Reg32,[变量+变量]
2)变量+常量 = add 变量+常量
3)变量+1 = lea Reg32,[变量+1]
注:这里的代码我和书里不一样,书中是【变量+1 = inc 常量】
1.2 减法的识别与优化
减法优化与加法大同小异。
C源代码:
int _tmain(int argc, _TCHAR* argv[])
{
int nNum, nA = 8;
nNum = argc - nA; // 形式1:变量减变量
printf("%d\r\n",nNum);
nNum = argc - 9; // 形式2:变量减常量
printf("%d\r\n",nNum);
nNum = nNum - 1; // 形式3:变量减1
printf("%d\r\n",nNum);
return 0;
}
Debug反汇编:
00E07710 push ebp
00E07711 mov ebp,esp
00E07713 sub esp,0xD8
00E07719 push ebx
00E0771A push esi ; 9-34.<ModuleEntryPoint>
00E0771B push edi ; 9-34.<ModuleEntryPoint>
00E0771C lea edi,[local.54]
00E07722 mov ecx,0x36
00E07727 mov eax,0xCCCCCCCC
00E0772C rep stos dword ptr es:[edi]
00E0772E>mov [local.5],0x8 ; nA = 8
00E07735 mov eax,[arg.1] ; eax = argc
00E07738 sub eax,[local.5] ; eax = eax - nA; eax = argc-nA;形式1:变量-变量
00E0773B mov [local.2],eax ; nNum = eax
00E0773E mov eax,[local.2] ; kernel32.BaseThreadInitThunk
00E07741 push eax
00E07742 push 9-34.00E98E50 ; %d\r\n
00E07747 call 9-34.00E03D14 ; printf
00E0774C add esp,0x8 ; 调用者堆栈平衡
00E0774F mov eax,[arg.1] ; eax = argc
00E07752 sub eax,0x9 ; eax = eax - 9;argc-9;形式2:变量-常量
00E07755 mov [local.2],eax ; nNum = eax
00E07758 mov eax,[local.2] ; kernel32.BaseThreadInitThunk
00E0775B push eax
00E0775C push 9-34.00E98E50 ; %d\r\n
00E07761 call 9-34.00E03D14 ; printf
00E07766 add esp,0x8
00E07769 mov eax,[local.2] ; eax = nNum
00E0776C sub eax,0x1 ; eax = eax - 1;nNum=nNum-1;形式3:变量-1
00E0776F mov [local.2],eax ; nNum = eax
00E07772 mov eax,[local.2] ; kernel32.BaseThreadInitThunk
00E07775 push eax
00E07776 push 9-34.00E98E50 ; %d\r\n
00E0777B call 9-34.00E03D14 ; printf
00E07780 add esp,0x8
00E07783 xor eax,eax
00E07785 pop edi ; kernel32.74EE38F4
00E07786 pop esi ; kernel32.74EE38F4
00E07787 pop ebx ; kernel32.74EE38F4
00E07788 add esp,0xD8
00E0778E cmp ebp,esp
00E07790 call 9-34.00E02329
00E07795 mov esp,ebp
00E07797 pop ebp ; kernel32.74EE38F4
00E07798 retn
Release反汇编:
012A1260 push ebp
012A1261 mov ebp,esp
012A1263 push esi ;9-34.<ModuleEntryPoint>
012A1264 mov esi,[arg.1] ;esi = nA
012A1267 lea eax,dword ptr ds:[esi-0x8];esi-8;nA-8;形式1:变量-变量
012A126A push eax
012A126B push 9-34.012B64D8 ;%d\r\n
012A1270 call 9-34.printf>; printf
012A1275 add esi,-0x9 ;esi = esi-9;argc-9;形式2:变量-常量;-0x9是补码
012A1278 push esi ;9-34.<ModuleEntryPoint>
012A1279 push 9-34.012B64D8 ;%d\r\n
012A127E call 9-34.printf>
012A1283 lea eax,dword ptr ds:[esi-0x1];eax = esi-1;nNum-1;形式3:变量-1
012A1286 push eax
012A1287 push 9-34.012B64D8 ;%d\r\n
012A128C call 9-34.printf>
012A1291 add esp,0x18
012A1294 xor eax,eax
012A1296 pop esi ;kernel32.74EE38F4
012A1297 pop ebp ;kernel32.74EE38F4
012A1298 retn
小结:
1)变量-变量 = lea Reg32,[变量-变量]
2)变量-常量 = add 变量+补码(常量)
3)变量-1 = lea Reg32,[变量-1]
注:这里的代码我和书里不一样,书中是【dec 变量】
0x2 乘法与除法的优化原理
左右移位就相当于在做乘除计算,左移1位相当于乘以2,右移1位相当于除以2。
2.1 乘法
2.1.1 乘法的位移优化
当乘数为2的整数次方、且大于8时,编译器才会使用优化。
c源代码:
int _tmain(int argc, _TCHAR* argv[])
{
int nNum = 16;
printf("%p",nNum*argc);
return 0;
}
Debug汇编
008D7710 push ebp
008D7711 mov ebp,esp
008D7713 sub esp,0xCC
008D7719 push ebx
008D771A push esi ; 9-35.<ModuleEntryPoint>
008D771B push edi ; 9-35.<ModuleEntryPoint>
008D771C lea edi,[local.51]
008D7722 mov ecx,0x33
008D7727 mov eax,0xCCCCCCCC
008D772C rep stos dword ptr es:[edi]
008D772E mov [local.2],0x10 ; 局部变量nNum = 10;0x10等于十进制16
008D7735 mov eax,[local.2] ; eax = 局部变量nNum
008D7738 imul eax,[arg.1] ; eax = eax*argc;乘法运算
008D773C push eax
008D773D push 9-35.00968E50 ; %p
008D7742 call 9-35.008D3D14 ; printf
008D7747 add esp,0x8
008D774A xor eax,eax
008D774C pop edi ; kernel32.74EE38F4
008D774D pop esi ; kernel32.74EE38F4
008D774E pop ebx ; kernel32.74EE38F4
008D774F add esp,0xCC
008D7755 cmp ebp,esp
008D7757 call 9-35.008D2329
008D775C mov esp,ebp
008D775E pop ebp ; kernel32.74EE38F4
008D775F retn
Release汇编:
000E1260 push ebp
000E1261 mov ebp,esp
000E1263 mov eax,[arg.1]
000E1266 shl eax,0x4 ; 左移动0x4位;左移动乘以4;shl逻辑左移指令[2^4=16]
000E1269 push eax
000E126A push 9-35.000F64D8 ; %p
000E126F call 9-35.printf>
000E1274 add esp,0x8
000E1277 xor eax,eax
000E1279 pop ebp ; kernel32.74EE38F4
000E127A retn
测试:
1)如果结果比2的次方多1会采用加法
2)如果结果比2的次方少1会采用减法
2.1.2 乘法的lea指令优化
C源代码:
int _tmain(int argc, _TCHAR* argv[])
{
int a = 1, b, c, d, e, f, g;
b = argc+a*4+6; // 形式1
c = argc+a*3+6; // 形式2
d = argc*2; // 形式3
e = argc*3; // 形式4
f = argc*4; // 形式5
g = argc*11; // 形式6
printf("%d %d %d %d %d %d", b, c, d, e, f, g);
return 0;
}
Debug汇编:
00DC7710 push ebp
00DC7711 mov ebp,esp
00DC7713 sub esp,0x114
00DC7719 push ebx
00DC771A push esi ; 9-36.<ModuleEntryPoint>
00DC771B push edi
00DC771C lea edi,[local.69]
00DC7722 mov ecx,0x45
00DC7727 mov eax,0xCCCCCCCC
00DC772C rep stos dword ptr es:[edi]
00DC772E>mov [local.2],0x1 ; 给局部变量a赋值
00DC7735 mov eax,[local.2] ; eax = a
00DC7738 mov ecx,[arg.1] ; ecx = argc
00DC773B lea edx,dword ptr ds:[ecx+eax*4+0x6]; edx = argc+a*4+6
00DC773F mov [local.5],edx ; 局部变量b = edx
00DC7742 imul eax,[local.2],0x3 ; 先做a*3
00DC7746 mov ecx,[arg.1] ; ecx = argc
00DC7749 lea edx,dword ptr ds:[ecx+eax+0x6] ; edx = 【ecx+eax+0x6】;edx = argc + (a*3) + 0x6
00DC774D mov [local.8],edx ; 局部变量c = edx
00DC7750 mov eax,[arg.1] ; eax = argc
00DC7753 shl eax,1 ; argc*2;左移1位,相当于乘以2
00DC7755 mov [local.11],eax ; 赋值给局部变量d
00DC7758 imul eax,[arg.1],0x3 ; 使用乘法指令;argc*3
00DC775C mov [local.14],eax ; 局部变量e = eax
00DC775F mov eax,[arg.1] ; eax = argc
00DC7762 shl eax,0x2 ; eax = eax*4;左移2位等于乘以4
00DC7765 mov [local.17],eax ; 局部变量f = eax
00DC7768 imul eax,[arg.1],0xB ; eax = argc*11;B等于十六进制的12
00DC776C mov [local.20],eax ; 局部变量g = eax
00DC776F mov eax,[local.20]
00DC7772 push eax
00DC7773 mov ecx,[local.17]
00DC7776 push ecx
00DC7777 mov edx,[local.14]
00DC777A push edx
00DC777B mov eax,[local.11]
00DC777E push eax
00DC777F mov ecx,[local.8]
00DC7782 push ecx
00DC7783 mov edx,[local.5]
00DC7786 push edx
00DC7787 push 9-36.00E58E50 ; %d %d %d %d %d %d
00DC778C call 9-36.00DC3D14 ; printf
00DC7791 add esp,0x1C
00DC7794 xor eax,eax
Release版反汇编代码:
01271000 push ebp
01271001 mov ebp,esp
01271003 mov ecx,[arg.1] ; exc = argc;argc=1
01271006 imul eax,ecx,0xB ; g = argc*11
01271009 push eax
0127100A lea eax,dword ptr ds:[ecx*4] ; f = argc*4
01271011 push eax
01271012 lea eax,dword ptr ds:[ecx+ecx*2]; e = argc*3;argc=1+1*2
01271015 push eax
01271016 lea eax,dword ptr ds:[ecx+ecx] ; d = argc*2;优化成了加法 1+1
01271019 push eax
0127101A lea eax,dword ptr ds:[ecx+0x9] ; c = argc+9; 把a*3+6计算完毕;
0127101D push eax
0127101E lea eax,dword ptr ds:[ecx+0xA] ; b = argc+a*4 把a*4+6计算完毕;
01271021 push eax
01271022 push 9-36.01286448 ; ASCII 25,"d %d %d %d %d %d"
01271027 call 9-36.printf_>
0127102C add esp,0x1C
0127102F xor eax,eax
01271031 pop ebp
01271032 retn
2.1.3 除法与倒数相乘
编译器世界中倒数相乘的中心思想其实就是用乘法来代替除法运算。它的原理很简单,就是将被除数乘以除数的倒数,其公式为x/y = x*(1/y),我们拿10/2作为例子,我可以得出以下推论:
由 公式x/y = x(1/y) 可得 10/2 = 10(1/2) = 10*0.5
这里没有写反汇编后的状态
2.1.4 倒数相乘与定点运算的配合
理论知识,没仔细去看
除法
2.1.1 除法的识别与优化
这一章虽然讲了编译器对除法的优化,但是不做笔记。
2.2.2 取模运算的识别与优化
不理解,略过,需要用时回头再看
小结
普通除法
mov Reg32_1,XXXXXXXXh
imul Reg32_2 (被除数)
...
mov Reg32_1,edx
shr Reg32_1,1Fh
add Reg32_1,edx
应用计算结果(如果倒数被向上圆整了,那么根据sar指令后的数值向下圆整即可)如下:
(除数=XXXXXXXXh*2^-32)
除数为2的次方
mov eax,(被除数)
cdq
add edx,XXh
add eax,edx
sar eax,YYh
应用计算结果如下:
(除数 = XXh + 1 或 2^YYh)
标签:nNum,argc,系统安全,esp,mov,eax,push,减法,除法 From: https://blog.csdn.net/ztc131450/article/details/143866834