AnTuTu and Intel

Page 7 - Seeking answers? Join the AnandTech community: where nearly half-a-million members share solutions and discuss the latest tech.

Schmide

Diamond Member
Mar 7, 2002
5,588
719
126
I had some free time so I decided to play with icc 13.

First a handy site: http://gcc.godbolt.org/. This allows you to enter code and see the resulting assembly after compilation with gcc, icc, clang and gcc-arm.

Nice tool.

Loop halved/instructions unrolled, breaks the optimization.

Code:
void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 32;
  unsigned overun = nb & 1;

  nb>>=1;
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:
L__routine_start__Z1rPj_0:
r(unsigned int*):
        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %ebx                                          #4.1
        xorl      %eax, %eax                                    #
        movl      16(%esp), %ecx                                #3.6
        movl      $15, %edx                                     #
        movl      %ecx, %esi                                    #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %eax, %ebx                                    #11.21
        movl      %eax, %ecx                                    #11.41
        shrl      $5, %ebx                                      #11.21
        movl      $1, %edi                                      #11.41
        shll      %cl, %edi                                     #11.41
        lea       1(%eax), %ecx                                 #14.5
        decl      %edx                                          #10.10
        addl      $2, %eax                                      #14.5
        orl       %edi, (%esi,%ebx,4)                           #11.5
        movl      %ecx, %ebx                                    #13.21
        shrl      $5, %ebx                                      #13.21
        movl      $1, %edi                                      #13.41
        shll      %cl, %edi                                     #13.41
        orl       %edi, (%esi,%ebx,4)                           #13.5
        cmpl      $-1, %edx                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        popl      %ebx                                          #18.1
        popl      %edi                                          #18.1
        popl      %esi                                          #18.1
        ret                                                     #18.1



Edit: This one is funny.

Index halved, two subsequent loops identical except for the counter and an overun if necessary.

Code:
// Type your code here, or load an example.

void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 32;
  unsigned overun = nb & 1;
  nb>>=1;
  unsigned nb2=nb; 
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  while (nb2--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:
L__routine_start__Z1rPj_0:
r(unsigned int*):
        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %esi                                          #4.1
        xorl      %edx, %edx                                    #
        movl      $15, %esi                                     #
        movl      %edx, %ecx                                    #
        movl      16(%esp), %eax                                #3.6
        movl      %esi, %edi                                    #
        movl      %ebp, (%esp)                                  #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %ecx, %edx                                    #11.21
        movl      $1, %ebp                                      #11.41
        shrl      $5, %edx                                      #11.21
        decl      %esi                                          #10.10
        shll      %cl, %ebp                                     #11.41
        incl      %ecx                                          #12.5
        orl       %ebp, (%eax,%edx,4)                           #11.5
        cmpl      $-1, %esi                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        movl      %ecx, %edx                                    #
        lea       1(%edi), %ecx                                 #14.10
        movl      (%esp), %ebp                                  #
        movl      %edx, %esi                                    #14.10
        andl      $31, %edx                                     #14.10
        shrl      $5, %esi                                      #14.10
        lea       (%eax,%esi,4), %eax                           #14.10
        movl      %ecx, %esi                                    #14.10
        addl      %edx, %ecx                                    #14.10
        cmpl      $32, %ecx                                     #14.10
        jbe       ..L12         # Prob 50%                      #14.10
        movl      %ecx, %esi                                    #14.10
        movl      %edx, %ecx                                    #14.10
        movl      $-1, %edi                                     #14.10
        shll      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
        subl      $32, %esi                                     #14.10
        addl      $4, %eax                                      #14.10
        movl      $-1, %edi                                     #14.10
        cmpl      $32, %esi                                     #14.10
        jbe       ..L13         # Prob 50%                      #14.10
..L14:                                                          #
        movl      %edi, (%eax)                                  #14.10
        addl      $4, %eax                                      #14.10
        subl      $32, %esi                                     #14.10
        cmpl      $32, %esi                                     #14.10
        ja        ..L14         # Prob 50%                      #14.10
..L13:                                                          #
        movl      $32, %ecx                                     #14.10
        subl      %esi, %ecx                                    #14.10
        shrl      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
        jmp       ..L15         # Prob 100%                     #14.10
..L12:                                                          #
        movl      $-1, %edi                                     #14.10
        movl      $32, %ecx                                     #14.10
        subl      %esi, %ecx                                    #14.10
        shrl      %cl, %edi                                     #14.10
        movl      %edx, %ecx                                    #14.10
        shll      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
..L15:                                                          #
        popl      %ecx                                          #20.1
        popl      %edi                                          #20.1
        popl      %esi                                          #20.1
        ret                                                     #20.1

Skips the first loop optimization, optimizes the second.

Change nb to an odd number, breaks the optimization for the second loop!

Code:
void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 65;
  unsigned overun = nb & 1;
  nb>>=1;
  unsigned nb2=nb; 
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  while (nb2--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:
        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %esi                                          #4.1
        xorl      %ecx, %ecx                                    #
        movl      16(%esp), %edx                                #
        movl      $31, %eax                                     #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %ecx, %esi                                    #11.21
        movl      $1, %edi                                      #11.41
        shrl      $5, %esi                                      #11.21
        decl      %eax                                          #10.10
        shll      %cl, %edi                                     #11.41
        incl      %ecx                                          #12.5
        orl       %edi, (%edx,%esi,4)                           #11.5
        cmpl      $-1, %eax                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        movl      16(%esp), %edi                                #14.10
        movl      $31, %eax                                     #14.10
..B1.4:                         # Preds ..B1.4 ..B1.3
        movl      %ecx, %edx                                    #15.21
        movl      $1, %esi                                      #15.41
        shrl      $5, %edx                                      #15.21
        decl      %eax                                          #14.10
        shll      %cl, %esi                                     #15.41
        incl      %ecx                                          #16.5
        orl       %esi, (%edi,%edx,4)                           #15.5
        cmpl      $-1, %eax                                     #14.10
        jne       ..B1.4        # Prob 82%                      #14.10
        movl      %ecx, %eax                                    #19.21
        movl      $1, %edx                                      #19.41
        shrl      $5, %eax                                      #19.21
        shll      %cl, %edx                                     #19.41
        movl      16(%esp), %ecx                                #19.5
        orl       %edx, (%ecx,%eax,4)                           #19.5
        popl      %ecx                                          #20.1
        popl      %edi                                          #20.1
        popl      %esi                                          #20.1
        ret                                                     #20.1

Optimization skipped because of the overun.
 
Last edited:

Idontcare

Elite Member
Oct 10, 1999
21,118
58
91
Thanks guys, got it. So then, this means intel really is cheating again, huh.

Not Intel, just anyone using ICC in a way that results in the creatively optimized code

Remember gun manufacturers make guns, but that doesn't mean they kill people or are responsible for firearm related deaths. Same with planes, trains, and automobiles.
 
sale-70-410-exam    | Exam-200-125-pdf    | we-sale-70-410-exam    | hot-sale-70-410-exam    | Latest-exam-700-603-Dumps    | Dumps-98-363-exams-date    | Certs-200-125-date    | Dumps-300-075-exams-date    | hot-sale-book-C8010-726-book    | Hot-Sale-200-310-Exam    | Exam-Description-200-310-dumps?    | hot-sale-book-200-125-book    | Latest-Updated-300-209-Exam    | Dumps-210-260-exams-date    | Download-200-125-Exam-PDF    | Exam-Description-300-101-dumps    | Certs-300-101-date    | Hot-Sale-300-075-Exam    | Latest-exam-200-125-Dumps    | Exam-Description-200-125-dumps    | Latest-Updated-300-075-Exam    | hot-sale-book-210-260-book    | Dumps-200-901-exams-date    | Certs-200-901-date    | Latest-exam-1Z0-062-Dumps    | Hot-Sale-1Z0-062-Exam    | Certs-CSSLP-date    | 100%-Pass-70-383-Exams    | Latest-JN0-360-real-exam-questions    | 100%-Pass-4A0-100-Real-Exam-Questions    | Dumps-300-135-exams-date    | Passed-200-105-Tech-Exams    | Latest-Updated-200-310-Exam    | Download-300-070-Exam-PDF    | Hot-Sale-JN0-360-Exam    | 100%-Pass-JN0-360-Exams    | 100%-Pass-JN0-360-Real-Exam-Questions    | Dumps-JN0-360-exams-date    | Exam-Description-1Z0-876-dumps    | Latest-exam-1Z0-876-Dumps    | Dumps-HPE0-Y53-exams-date    | 2017-Latest-HPE0-Y53-Exam    | 100%-Pass-HPE0-Y53-Real-Exam-Questions    | Pass-4A0-100-Exam    | Latest-4A0-100-Questions    | Dumps-98-365-exams-date    | 2017-Latest-98-365-Exam    | 100%-Pass-VCS-254-Exams    | 2017-Latest-VCS-273-Exam    | Dumps-200-355-exams-date    | 2017-Latest-300-320-Exam    | Pass-300-101-Exam    | 100%-Pass-300-115-Exams    |
http://www.portvapes.co.uk/    | http://www.portvapes.co.uk/    |