CPU Instructions per Second:
- MIPS = Million instructions per second
- GIPS = Billion instructions per second
Synthetic benchmarks such as Dhrystone (as used by SIsoftware) are now generally used to estimate computer performance in commonly used applications. Below are Dhrystone results for current CPUs:
Here are some rough rules of thumb:
Let us assume conservative estimate of 5 GIPS/Ghz. Hence a one 3 GHz core can execute roughly 15 billion machine instructructions per second. Yet this is machine instructions, yet how could this translate to a high level language like C?
This is very tough to estimate, but lets assume a that for every line of C we have 10 machine instructions, then we a we can have 500 C MIPS/GHz. Hence a 2 GHz CPU can execute roughly 1 billion C instructions per second. Modern CPUs are very, very fast when application is CPU bound.
If we assume a C application transaction takes 10ms to process (and it is CPU bound) on a 2 GHz CPU then is is executing 2 (GHz) x 500 (C MIPS) x 0.01 = 10 million lines of C code. If we have requirement of 200 transactions per second and our CPU is running at 2 GHz then we need the following number of CPU cores:
200 (transactions per second) / (1 (second) / 0.01 (10ms) ) = 2 CPU cores
I used the below Sieve of Eratosthenes prime number C code to estimate C to machine instruction ratio:
https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes
Calculate primes up to 1,000,000:
#include <stdio.h>
int main()
{
int number,i,j;
number=1000000;
int primes[number+1];
//populating array with naturals numbers
for(i = 2; i<=number; i++)
primes[i] = i;
i = 2;
while ((i*i) <= number)
{
if (primes[i] != 0)
{
for(j=2; j<number; j++)
{
if (primes[i]*j > number)
break;
else
// Instead of deleteing , making elemnets 0
primes[primes[i]*j]=0;
}
}
i++;
}
for(i = 2; i<=number; i++)
{
//If number is not 0 then it is prime
if (primes[i]!=0)
printf("%d\n",primes[i]);
}
return 0;
}
$ gcc -O2 -S -c sieve.c # to produce assembler code
$ cat sieve.s
.file "sieve.c"
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "%d\n"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB11:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movl $2, %eax
movq %rsp, %rbp
.cfi_def_cfa_register 6
pushq %r12
pushq %rbx
subq $4000032, %rsp
leaq 15(%rsp), %r12
.cfi_offset 3, -32
.cfi_offset 12, -24
andq $-16, %r12
movq %r12, %rdx
.p2align 4,,10
.p2align 3
.L2:
movl %eax, 8(%rdx)
addl $1, %eax
addq $4, %rdx
cmpl $1000001, %eax
jne .L2
movl $2, %ebx
.p2align 4,,10
.p2align 3
.L5:
movslq %ebx, %rcx
movl (%r12,%rcx,4), %edx
testl %edx, %edx
je .L3
movl $2, %eax
jmp .L4
.p2align 4,,10
.p2align 3
.L13:
addl $1, %eax
movslq %edx, %rdx
cmpl $1000000, %eax
movl $0, (%r12,%rdx,4)
je .L3
movl (%r12,%rcx,4), %edx
.L4:
imull %eax, %edx
cmpl $1000000, %edx
jle .L13
.L3:
addl $1, %ebx
movl %ebx, %eax
imull %ebx, %eax
cmpl $1000000, %eax
jle .L5
xorl %ebx, %ebx
jmp .L7
.p2align 4,,10
.p2align 3
.L6:
addq $4, %rbx
cmpq $3999996, %rbx
je .L14
.L7:
movl 8(%r12,%rbx), %esi
testl %esi, %esi
je .L6
xorl %eax, %eax
movl $.LC0, %edi
addq $4, %rbx
call printf
cmpq $3999996, %rbx
jne .L7
.L14:
leaq -16(%rbp), %rsp
xorl %eax, %eax
popq %rbx
popq %r12
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE11:
.size main, .-main
.ident "GCC: (GNU) 4.4.7 20120313 (Red Hat 4.4.7-18)"
.section .note.GNU-stack,"",@progbits
$ wc -l sieve.c sieve.s
39 sieve.c # number of line of c code
93 sieve.s # number of assembler instructions
$ gcc -c -g -Wa,-a,-ad s.c > sieve.lst
$ cat sieve.lst
GAS LISTING /tmp/cccMtKmE.s page 1
1 .file "s.c"
9 .Ltext0:
10 .section .rodata
11 .LC0:
12 0000 25640A00 .string "%d\n"
13 .text
14 .globl main
16 main:
17 .LFB0:
18 .file 1 "s.c"
0:s.c **** #include <stdio.h>
1:s.c ****
2:s.c **** int main()
3:s.c **** {
19 .loc 1 4 0
20 .cfi_startproc
21 0000 55 pushq %rbp
22 .cfi_def_cfa_offset 16
23 .cfi_offset 6, -16
24 0001 4889E5 movq %rsp, %rbp
25 .cfi_def_cfa_register 6
26 0004 53 pushq %rbx
27 0005 4883EC28 subq $40, %rsp
28 .loc 1 4 0
29 0009 4889E0 movq %rsp, %rax
30 000c 4889C3 movq %rax, %rbx
31 .cfi_offset 3, -24
4:s.c **** int number,i,j;
5:s.c **** number=1000000;
32 .loc 1 6 0
33 000f C745E440 movl $1000000, -28(%rbp)
33 420F00
6:s.c ****
7:s.c **** int primes[number+1];
34 .loc 1 8 0
35 0016 8B45E4 movl -28(%rbp), %eax
36 0019 83C001 addl $1, %eax
37 001c 4863D0 movslq %eax, %rdx
38 001f 4883EA01 subq $1, %rdx
39 0023 488955D0 movq %rdx, -48(%rbp)
40 0027 4898 cltq
41 0029 48C1E002 salq $2, %rax
42 002d 4883C00F addq $15, %rax
43 0031 4883C00F addq $15, %rax
44 0035 48C1E804 shrq $4, %rax
45 0039 48C1E004 salq $4, %rax
46 003d 4829C4 subq %rax, %rsp
47 0040 4889E0 movq %rsp, %rax
48 0043 4883C00F addq $15, %rax
49 0047 48C1E804 shrq $4, %rax
50 004b 48C1E004 salq $4, %rax
51 004f 488945D8 movq %rax, -40(%rbp)
8:s.c ****
9:s.c **** //populating array with naturals numbers
10:s.c **** for(i = 2; i<=number; i++)
52 .loc 1 11 0
53 0053 C745E802 movl $2, -24(%rbp)
GAS LISTING /tmp/cccMtKmE.s page 2
53 000000
54 005a EB14 jmp .L2
55 .L3:
11:s.c **** primes[i] = i;
56 .loc 1 12 0
57 005c 8B55E8 movl -24(%rbp), %edx
58 005f 488B45D8 movq -40(%rbp), %rax
59 0063 4863D2 movslq %edx, %rdx
60 0066 8B4DE8 movl -24(%rbp), %ecx
61 0069 890C90 movl %ecx, (%rax,%rdx,4)
11:s.c **** primes[i] = i;
62 .loc 1 11 0
63 006c 8345E801 addl $1, -24(%rbp)
64 .L2:
65 0070 8B45E8 movl -24(%rbp), %eax
66 0073 3B45E4 cmpl -28(%rbp), %eax
67 0076 7EE4 jle .L3
12:s.c ****
13:s.c **** i = 2;
68 .loc 1 14 0
69 0078 C745E802 movl $2, -24(%rbp)
69 000000
14:s.c **** while ((i*i) <= number)
70 .loc 1 15 0
71 007f EB64 jmp .L4
72 .L9:
15:s.c **** {
16:s.c **** if (primes[i] != 0)
73 .loc 1 17 0
74 0081 8B55E8 movl -24(%rbp), %edx
75 0084 488B45D8 movq -40(%rbp), %rax
76 0088 4863D2 movslq %edx, %rdx
77 008b 8B0490 movl (%rax,%rdx,4), %eax
78 008e 85C0 testl %eax, %eax
79 0090 744F je .L5
17:s.c **** {
18:s.c **** for(j=2; j<number; j++)
80 .loc 1 19 0
81 0092 C745EC02 movl $2, -20(%rbp)
81 000000
82 0099 EB3B jmp .L6
83 .L8:
19:s.c **** {
20:s.c **** if (primes[i]*j > number)
84 .loc 1 21 0
85 009b 8B55E8 movl -24(%rbp), %edx
86 009e 488B45D8 movq -40(%rbp), %rax
87 00a2 4863D2 movslq %edx, %rdx
88 00a5 8B0490 movl (%rax,%rdx,4), %eax
89 00a8 0FAF45EC imull -20(%rbp), %eax
90 00ac 3B45E4 cmpl -28(%rbp), %eax
91 00af 7F2F jg .L14
92 .L7:
21:s.c **** break;
22:s.c **** else
23:s.c **** // Instead of deleteing , making elemnets 0
24:s.c **** primes[primes[i]*j]=0;
GAS LISTING /tmp/cccMtKmE.s page 3
93 .loc 1 25 0
94 00b1 8B55E8 movl -24(%rbp), %edx
95 00b4 488B45D8 movq -40(%rbp), %rax
96 00b8 4863D2 movslq %edx, %rdx
97 00bb 8B0490 movl (%rax,%rdx,4), %eax
98 00be 89C2 movl %eax, %edx
99 00c0 0FAF55EC imull -20(%rbp), %edx
100 00c4 488B45D8 movq -40(%rbp), %rax
101 00c8 4863D2 movslq %edx, %rdx
102 00cb C7049000 movl $0, (%rax,%rdx,4)
102 000000
19:s.c **** {
103 .loc 1 19 0
104 00d2 8345EC01 addl $1, -20(%rbp)
105 .L6:
106 00d6 8B45EC movl -20(%rbp), %eax
107 00d9 3B45E4 cmpl -28(%rbp), %eax
108 00dc 7CBD jl .L8
109 00de EB01 jmp .L5
110 .L14:
22:s.c **** break;
111 .loc 1 22 0
112 00e0 90 nop
113 .L5:
25:s.c **** }
26:s.c **** }
27:s.c **** i++;
114 .loc 1 28 0
115 00e1 8345E801 addl $1, -24(%rbp)
116 .L4:
15:s.c **** while ((i*i) <= number)
117 .loc 1 15 0
118 00e5 8B45E8 movl -24(%rbp), %eax
119 00e8 0FAF45E8 imull -24(%rbp), %eax
120 00ec 3B45E4 cmpl -28(%rbp), %eax
121 00ef 7E90 jle .L9
28:s.c **** }
29:s.c ****
30:s.c **** for(i = 2; i<=number; i++)
122 .loc 1 31 0
123 00f1 C745E802 movl $2, -24(%rbp)
123 000000
124 00f8 EB36 jmp .L10
125 .L12:
31:s.c **** {
32:s.c **** //If number is not 0 then it is prime
33:s.c **** if (primes[i]!=0)
126 .loc 1 34 0
127 00fa 8B55E8 movl -24(%rbp), %edx
128 00fd 488B45D8 movq -40(%rbp), %rax
129 0101 4863D2 movslq %edx, %rdx
130 0104 8B0490 movl (%rax,%rdx,4), %eax
131 0107 85C0 testl %eax, %eax
132 0109 7421 je .L11
34:s.c **** printf("%d\n",primes[i]);
133 .loc 1 35 0
134 010b 8B55E8 movl -24(%rbp), %edx
GAS LISTING /tmp/cccMtKmE.s page 4
135 010e 488B45D8 movq -40(%rbp), %rax
136 0112 4863D2 movslq %edx, %rdx
137 0115 8B1490 movl (%rax,%rdx,4), %edx
138 0118 B8000000 movl $.LC0, %eax
138 00
139 011d 89D6 movl %edx, %esi
140 011f 4889C7 movq %rax, %rdi
141 0122 B8000000 movl $0, %eax
141 00
142 0127 E8000000 call printf
142 00
143 .L11:
31:s.c **** {
144 .loc 1 31 0
145 012c 8345E801 addl $1, -24(%rbp)
146 .L10:
147 0130 8B45E8 movl -24(%rbp), %eax
148 0133 3B45E4 cmpl -28(%rbp), %eax
149 0136 7EC2 jle .L12
35:s.c **** }
36:s.c ****
37:s.c **** return 0;
150 .loc 1 38 0
151 0138 B8000000 movl $0, %eax
151 00
152 013d 4889DC movq %rbx, %rsp
38:s.c **** }
153 .loc 1 39 0
154 0140 488B5DF8 movq -8(%rbp), %rbx
155 0144 C9 leave
156 .cfi_def_cfa 7, 8
157 0145 C3 ret
158 .cfi_endproc
159 .LFE0:
161 .Letext0:
GAS LISTING /tmp/cccMtKmE.s page 5
DEFINED SYMBOLS
*ABS*:0000000000000000 s.c
/tmp/cccMtKmE.s:16 .text:0000000000000000 main
UNDEFINED SYMBOLS
printf
$ sudo yum install -y perf
$ perf stat ./sieve > /dev/null
Performance counter stats for './sieve':
83.077618 task-clock (msec) # 0.991 CPUs utilized
4 context-switches # 0.048 K/sec
0 cpu-migrations # 0.000 K/sec
592 page-faults # 0.007 M/sec
120,181,719 cycles # 1.447 GHz (49.56%)
0 stalled-cycles-frontend (49.65%)
0 stalled-cycles-backend # 0.00% backend cycles idle (51.58%)
0 instructions # 0.00 insn per cycle (51.60%)
0 branches # 0.000 K/sec (50.88%)
0 branch-misses # 0.00% of all branches (50.20%)
0.083848321 seconds time elapsed
$ time ./s > /dev/null
real 0m0.086s
user 0m0.080s
sys 0m0.006s
BogoMIPS - Bogus unscientific MIPS:
https://en.wikipedia.org/wiki/BogoMips
In my Linux VM I had:
Total of 12 processors activated (55199.95 BogoMIPS)
As each vCPU is one thread we have 12 vCPUs = 6 cores. 6 cores x 5 GIPS/GHz x 2.3 GHz = 69 GIPS. It is not a bad ballpark estimate.
How do other languages compare in performance:
https://benchmarksgame-team.pages.debian.net/benchmarksgame/
http://attractivechaos.github.io/plb/
No comments:
Post a Comment