我目前有两种检查数字是否为质数的方法,以及另一种计算所需时间的方法。

IsPrime1:

bool IsPrime1(int i)
{
    if (i == 2 || i == 3 || i == 5 || i == 7) return true;
    return i % 2 != 0 && i % 3 != 0 && i % 5 != 0 && i % 7 != 0;
}


IsPrime2:

bool IsPrime2(int i)
{
    if (i == 2 || i == 3 || i == 5 || i == 7) return true;
    if (i % 2 == 0) return false;
    if (i % 3 == 0) return false;
    if (i % 5 == 0) return false;
    return i % 7 != 0;
}


CheckForTicks:

string CheckForTicks(int ticks)
{
    var sw1 = Stopwatch.StartNew();
    for (var g = 0; g < ticks; g++)
    {
        var b = IsPrime1(g);
    }
    sw1.Stop();

    var sw2 = Stopwatch.StartNew();
    for (var g = 0; g < ticks; g++)
    {
        var b = IsPrime2(g);
    }
    sw2.Stop();

    return $"{ticks} ticks: IsPrime1: {sw1.ElapsedMilliseconds} ms / IsPrime2: {sw2.ElapsedMilliseconds} ms";
    //equal to the following:
    //return string.Format("{0} ticks: IsPrime1: {1} ms / IsPrime2: {2} ms", ticks, sw1.ElapsedMilliseconds, sw2.ElapsedMilliseconds);
}


结果:

| CheckForTicks | IsPrime1 (in ms) | IsPrime2 (in ms) |
|---------------|------------------|------------------|
|        100000 |                3 |                4 |
|        500000 |               18 |               21 |
|       1000000 |               37 |               45 |
|       5000000 |              221 |              242 |
|      10000000 |              402 |              499 |
|      50000000 |             2212 |             2320 |
|     100000000 |             4377 |             4676 |
|     500000000 |            22125 |            23786 |


我想知道的是,为什么IsPrime2甚至比IsPrime1还要慢一些。
从我的角度来看,IsPrime2应该比IsPrime1快得多,因为它只需在第一个可能的returnIsPrime1检查所有可能性之前进行一次检查。
有什么我不知道的东西,或者与.NET有关吗?

如果有人可以向我解释其原因,我将不胜感激。

提前致谢!

PS:我正在使用Visual Studio 2015 RC.NET 4.6并在Debug模式下运行它。

最佳答案

让我们比较一下IL代码:


IsPrime1

.method private hidebysig instance bool  IsPrime1(int32 i) cil managed
{
  // Code size       45 (0x2d)
  .maxstack  8
  IL_0000:  ldarg.1
  IL_0001:  ldc.i4.2
  IL_0002:  beq.s      IL_0010
  IL_0004:  ldarg.1
  IL_0005:  ldc.i4.3
  IL_0006:  beq.s      IL_0010
  IL_0008:  ldarg.1
  IL_0009:  ldc.i4.5
  IL_000a:  beq.s      IL_0010
  IL_000c:  ldarg.1
  IL_000d:  ldc.i4.7
  IL_000e:  bne.un.s   IL_0012
  IL_0010:  ldc.i4.1
  IL_0011:  ret
  IL_0012:  ldarg.1
  IL_0013:  ldc.i4.2
  IL_0014:  rem
  IL_0015:  brfalse.s  IL_002b
  IL_0017:  ldarg.1
  IL_0018:  ldc.i4.3
  IL_0019:  rem
  IL_001a:  brfalse.s  IL_002b
  IL_001c:  ldarg.1
  IL_001d:  ldc.i4.5
  IL_001e:  rem
  IL_001f:  brfalse.s  IL_002b
  IL_0021:  ldarg.1
  IL_0022:  ldc.i4.7
  IL_0023:  rem
  IL_0024:  ldc.i4.0
  IL_0025:  ceq
  IL_0027:  ldc.i4.0
  IL_0028:  ceq
  IL_002a:  ret
  IL_002b:  ldc.i4.0
  IL_002c:  ret
} // end of method Program::IsPrime1

IsPrime2

.method private hidebysig instance bool  IsPrime2(int32 i) cil managed
{
  // Code size       49 (0x31)
  .maxstack  8
  IL_0000:  ldarg.1
  IL_0001:  ldc.i4.2
  IL_0002:  beq.s      IL_0010
  IL_0004:  ldarg.1
  IL_0005:  ldc.i4.3
  IL_0006:  beq.s      IL_0010
  IL_0008:  ldarg.1
  IL_0009:  ldc.i4.5
  IL_000a:  beq.s      IL_0010
  IL_000c:  ldarg.1
  IL_000d:  ldc.i4.7
  IL_000e:  bne.un.s   IL_0012
  IL_0010:  ldc.i4.1
  IL_0011:  ret
  IL_0012:  ldarg.1
  IL_0013:  ldc.i4.2
  IL_0014:  rem
  IL_0015:  brtrue.s   IL_0019
  IL_0017:  ldc.i4.0
  IL_0018:  ret
  IL_0019:  ldarg.1
  IL_001a:  ldc.i4.3
  IL_001b:  rem
  IL_001c:  brtrue.s   IL_0020
  IL_001e:  ldc.i4.0
  IL_001f:  ret
  IL_0020:  ldarg.1
  IL_0021:  ldc.i4.5
  IL_0022:  rem
  IL_0023:  brtrue.s   IL_0027
  IL_0025:  ldc.i4.0
  IL_0026:  ret
  IL_0027:  ldarg.1
  IL_0028:  ldc.i4.7
  IL_0029:  rem
  IL_002a:  ldc.i4.0
  IL_002b:  ceq
  IL_002d:  ldc.i4.0
  IL_002e:  ceq
  IL_0030:  ret
} // end of method Program::IsPrime2



两者的第一部分是相同的:

  .maxstack  8
  IL_0000:  ldarg.1
  IL_0001:  ldc.i4.2
  IL_0002:  beq.s      IL_0010
  IL_0004:  ldarg.1
  IL_0005:  ldc.i4.3
  IL_0006:  beq.s      IL_0010
  IL_0008:  ldarg.1
  IL_0009:  ldc.i4.5
  IL_000a:  beq.s      IL_0010
  IL_000c:  ldarg.1
  IL_000d:  ldc.i4.7
  IL_000e:  bne.un.s   IL_0012
  IL_0010:  ldc.i4.1
  IL_0011:  ret


毫不奇怪,这匹配:

if (i == 2 || i == 3 || i == 5 || i == 7) return true;


其余代码是等效的,但是编译器为IsPrime1生成了较短的代码。

IL_0012:  ldarg.1              // Push i
IL_0013:  ldc.i4.2             // Push 2
IL_0014:  rem                  // Pop these and push i % 2
IL_0015:  brfalse.s  IL_002b   // Go to IL_002b if the result is 0
...                            // Repeat the same pattern for 3, 5 and 7
IL_002b:  ldc.i4.0             // Push 0 (false)
IL_002c:  ret                  // Return


这是IsPrime2中的同一部分:

IL_0012:  ldarg.1              // Push i
IL_0013:  ldc.i4.2             // Push 2
IL_0014:  rem                  // Pop these and push i % 2
IL_0015:  brtrue.s   IL_0019   // Go to IL_0019 if the result is not 0
IL_0017:  ldc.i4.0             // Else load 0 (false)
IL_0018:  ret                  // ... and return
IL_0019:  ...                  // Here's the next condition
...


如您所见,return false代码在IsPrime2中重复了几次,但在IsPrime1情况下是要考虑的。较短的代码意味着更少的加载和处理指令,进而意味着更少的开销和更少的处理时间。

现在,JIT呢?它是否优化了其中任何一项?


IsPrime1 x86

            return i % 2 != 0 && i % 3 != 0 && i % 5 != 0 && i % 7 != 0;
00000022  mov         eax,esi
00000024  and         eax,80000001h
00000029  jns         00000030
0000002b  dec         eax
0000002c  or          eax,0FFFFFFFEh
0000002f  inc         eax
00000030  test        eax,eax
00000032  je          00000061
00000034  mov         eax,esi
00000036  mov         ecx,3
0000003b  cdq
0000003c  idiv        eax,ecx
0000003e  test        edx,edx
00000040  je          00000061
00000042  mov         eax,esi
00000044  lea         ecx,[ecx+2]
00000047  cdq
00000048  idiv        eax,ecx
0000004a  test        edx,edx
0000004c  je          00000061
0000004e  lea         ecx,[ecx+2]
00000051  mov         eax,esi
00000053  cdq
00000054  idiv        eax,ecx
00000056  test        edx,edx
00000058  setne       al
0000005b  movzx       eax,al
0000005e  pop         esi
0000005f  pop         ebp
00000060  ret
00000061  xor         eax,eax
00000063  pop         esi
00000064  pop         ebp
00000065  ret

IsPrime2 x86

            if (i % 2 == 0) return false;
00000021  mov         eax,esi
00000023  and         eax,80000001h
00000028  jns         0000002F
0000002a  dec         eax
0000002b  or          eax,0FFFFFFFEh
0000002e  inc         eax
0000002f  test        eax,eax
00000031  jne         00000037
00000033  xor         eax,eax
00000035  jmp         0000006D
            if (i % 3 == 0) return false;
00000037  mov         eax,esi
00000039  mov         ecx,3
0000003e  cdq
0000003f  idiv        eax,ecx
00000041  test        edx,edx
00000043  jne         00000049
00000045  xor         eax,eax
00000047  jmp         0000006D
            if (i % 5 == 0) return false;
00000049  mov         eax,esi
0000004b  mov         ecx,5
00000050  cdq
00000051  idiv        eax,ecx
00000053  test        edx,edx
00000055  jne         0000005B
00000057  xor         eax,eax
00000059  jmp         0000006D
            return i % 7 != 0;
0000005b  mov         ecx,7
00000060  mov         eax,esi
00000062  cdq
00000063  idiv        eax,ecx
00000065  test        edx,edx
00000067  setne       al
0000006a  movzx       eax,al
0000006d  and         eax,0FFh
00000072  pop         esi
00000073  pop         ebp
00000074  ret



答案是...对于IsPrime2,本机代码仍然更长。例如,jne 00000037跳到第二个测试,jne 00000049跳到第三个测试,依此类推。对于IsPrime1,每个分支都指向00000061,这基本上是一个return false;

这是x64代码供参考:


IsPrime1 x64

            return i % 2 != 0 && i % 3 != 0 && i % 5 != 0 && i % 7 != 0;
0000001f  mov         eax,r8d
00000022  cdq
00000023  and         eax,1
00000026  xor         eax,edx
00000028  sub         eax,edx
0000002a  test        eax,eax
0000002c  je          000000000000008B
0000002e  mov         eax,55555556h
00000033  imul        r8d
00000036  mov         eax,edx
00000038  shr         eax,1Fh
0000003b  add         edx,eax
0000003d  lea         eax,[rdx+rdx*2]
00000040  mov         ecx,r8d
00000043  sub         ecx,eax
00000045  test        ecx,ecx
00000047  je          000000000000008B
00000049  mov         eax,66666667h
0000004e  imul        r8d
00000051  sar         edx,1
00000053  mov         eax,edx
00000055  shr         eax,1Fh
00000058  add         edx,eax
0000005a  lea         eax,[rdx+rdx*4]
0000005d  mov         ecx,r8d
00000060  sub         ecx,eax
00000062  test        ecx,ecx
00000064  je          000000000000008B
00000066  mov         eax,92492493h
0000006b  imul        r8d
0000006e  add         edx,r8d
00000071  sar         edx,2
00000074  mov         eax,edx
00000076  shr         eax,1Fh
00000079  add         edx,eax
0000007b  imul        edx,edx,7
0000007e  sub         r8d,edx
00000081  xor         eax,eax
00000083  test        r8d,r8d
00000086  setne       al
00000089  jmp         0000000000000092
0000008b  xor         eax,eax
0000008d  jmp         0000000000000092
0000008f  nop
            if (i == 2 || i == 3 || i == 5 || i == 7) return true;
00000090  mov         al,1
00000092  rep ret

IsPrime2 x64

            if (i % 2 == 0) return false;
00000027  mov         eax,r8d
0000002a  cdq
0000002b  and         eax,1
0000002e  xor         eax,edx
00000030  sub         eax,edx
00000032  test        eax,eax
00000034  jne         000000000000003A
00000036  xor         eax,eax
00000038  jmp         00000000000000A2
            if (i % 3 == 0) return false;
0000003a  mov         eax,55555556h
0000003f  imul        r8d
00000042  mov         eax,edx
00000044  shr         eax,1Fh
00000047  add         edx,eax
00000049  lea         eax,[rdx+rdx*2]
0000004c  mov         ecx,r8d
0000004f  sub         ecx,eax
00000051  test        ecx,ecx
00000053  jne         0000000000000059
00000055  xor         al,al
00000057  jmp         00000000000000A2
            if (i % 5 == 0) return false;
00000059  mov         eax,66666667h
0000005e  imul        r8d
00000061  sar         edx,1
00000063  mov         eax,edx
00000065  shr         eax,1Fh
00000068  add         edx,eax
0000006a  lea         eax,[rdx+rdx*4]
0000006d  mov         ecx,r8d
00000070  sub         ecx,eax
00000072  test        ecx,ecx
00000074  jne         000000000000007A
00000076  xor         al,al
00000078  jmp         00000000000000A2
            return i % 7 != 0;
0000007a  mov         eax,92492493h
0000007f  imul        r8d
00000082  add         edx,r8d
00000085  sar         edx,2
00000088  mov         eax,edx
0000008a  shr         eax,1Fh
0000008d  add         edx,eax
0000008f  imul        edx,edx,7
00000092  sub         r8d,edx
00000095  xor         eax,eax
00000097  test        r8d,r8d
0000009a  setne       al
0000009d  jmp         00000000000000A2
0000009f  nop
            if (i == 2 || i == 3 || i == 5 || i == 7) return true;
000000a0  mov         al,1
000000a2  rep ret



同样的结论。 jne 0000000000000059跳到第二个测试,jne 000000000000007A跳到第三个测试,依此类推,而在IsPrime1中,所有分支都指向000000000000008B,这是一个return false;。请注意,尽管这两个版本之间的指令计数差异在x64上较小。

哦,您还应该另外了解branch prediction的工作原理,以及CPU如何估计是否可能采用即将发生的分支。

关于c# - 我认为为什么方法更快,更慢?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/31099291/

10-09 01:34