'if else code blocks ... or one if with multiple OR ( | |) conditions within it ? in C
I was writing a small text matching program in C, which basically checks for the presence of a few characters in some strings (in form of char arrays). Its working now and I have a code block like this:
if (c == 'A') return 1;
else if (c == 'B') return 1;
else if (c == 'C') return 1;
....
....
else if (c == 'Z') return 1;
else return 0;
Is the block above faster? Or would this be faster?
if (c == 'A' || c == 'B' || c == 'C' ||....|| c == 'Z') return 1;
else return 0;
By fast I mean literally fast, i.e. if I run a simple timer from the start of the program till the end which could potentially give a shorter execution time?
Solution 1:[1]
I'd recommend doing the following:
#include <ctype.h>
...
return isupper(c)
Instead of manually checking all of them. The standard C library functions are reasonably fast so performance should be acceptable.
Solution 2:[2]
An optimizing compiler will handle both forms likewise.
Leave such micro-optimizations to the compiler. What also matters is the readability of your source code.
(Of course you need to enable optimizations, for GCC -e.g. the recent GCC 4.8- you'll compile with gcc -O2).
And you really need to benchmark to be sure (because tons of other factors also matter: in particular cache locality) what is the best. You could even use some more fancy algorithms (e.g. testing more frequent letters, like E, before rare ones like Z). Look for search trees for more.
Look for instance into the generated assembly code (use gcc -O2 -fverbose-asm -S to get it) or look into the internal representations, e.g. using the the MELT probe (or passing -fdump-tree-all -giving many dump files to you- to gcc).
With GCC extensions, for your specific example, you could even code case ranges like this:
switch (c) {
case 'A' ... 'Z': return 1;
default: return 0;
}
the above case range assmes that the character encoding is a superset of ASCII. It won't work for EBCDIC
Actually, switch optimization is complex. See this paper etc....
And actually, you want to use isalpha(3) from <ctype.h> (in the C99 standard).
Testing what is a letter is not that simple: Is é or ? a latter for you? For me it is one (they both are vowels and both need more than one byte in UTF8)
Be cautious about the common UTF-8 encoding: some letters (notably from non English languages or alphabet) are encoded with several bytes. Look e.g. at Glib Unicode Manipulation functions.
Solution 3:[3]
If you have a larger number of conditions, a switch block would be better:
switch(c) {
case 'A':
case 'B':
// (...)
case 'Z': return 1;
default: return 0;
}
Solution 4:[4]
Without entering in the scope of compiler optimization, assembly generation and better algorithms, what other posters missed is that the single if with multiple test cases and multiple ORs (||) will short-circuit after the first match. Effectively, the first and the second snippets of code will generate equivalent ASM in terms of speed.
Solution 5:[5]
As other mentioned,makes no different. The same count of cmp instructions will be generated in both ifs conditions(it is not considering any optmization by compiler)
EDIT:
Consider this C functions:
int is_letter2(int c)
{
if(c == 'A') return 1;
else if(c == 'B') return 1;
else if(c == 'C') return 1;
else if(c == 'D') return 1;
else if(c == 'E') return 1;
else if(c == 'F') return 1;
else if(c == 'G') return 1;
else if(c == 'H') return 1;
else if(c == 'I') return 1;
else if(c == 'J') return 1;
else if(c == 'K') return 1;
else if(c == 'L') return 1;
else if(c == 'M') return 1;
else if(c == 'N') return 1;
else if(c == 'O') return 1;
else if(c == 'P') return 1;
else if(c == 'Q') return 1;
else if(c == 'R') return 1;
else if(c == 'S') return 1;
else if(c == 'T') return 1;
else if(c == 'U') return 1;
else if(c == 'V') return 1;
else if(c == 'W') return 1;
else if(c == 'X') return 1;
else if(c == 'Y') return 1;
else if(c == 'Z') return 1;
else return 0;
}
int is_letter(int c)
{
if(c == 'A' ||c == 'B' ||c == 'C' ||c == 'D' ||c == 'E' ||c == 'F' ||c == 'G' ||c == 'H' ||c == 'I' ||c == 'J' ||c == 'K' ||c == 'L' ||c == 'M' ||c == 'N' ||c == 'O' ||c == 'P' ||c == 'Q' ||c == 'R' ||c == 'S' ||c == 'T' ||c == 'U' ||c == 'V' ||c == 'W' ||c == 'X' ||c == 'Y' ||c == 'Z')
return 1;
else return 0;
}
And the assembly output: It's almost the same. Check out:
is_letter2:
.LFB1:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
cmpl $65, 8(%ebp)
jne .L3
movl $1, %eax
jmp .L4
.L3:
cmpl $66, 8(%ebp)
jne .L5
movl $1, %eax
jmp .L4
.L5:
cmpl $67, 8(%ebp)
jne .L6
movl $1, %eax
jmp .L4
.L6:
cmpl $68, 8(%ebp)
jne .L7
movl $1, %eax
jmp .L4
.L7:
cmpl $69, 8(%ebp)
jne .L8
movl $1, %eax
jmp .L4
.L8:
cmpl $70, 8(%ebp)
jne .L9
movl $1, %eax
jmp .L4
.L9:
cmpl $71, 8(%ebp)
jne .L10
movl $1, %eax
jmp .L4
.L10:
cmpl $72, 8(%ebp)
jne .L11
movl $1, %eax
jmp .L4
.L11:
cmpl $73, 8(%ebp)
jne .L12
movl $1, %eax
jmp .L4
.L12:
cmpl $74, 8(%ebp)
jne .L13
movl $1, %eax
jmp .L4
.L13:
cmpl $75, 8(%ebp)
jne .L14
movl $1, %eax
jmp .L4
.L14:
cmpl $76, 8(%ebp)
jne .L15
movl $1, %eax
jmp .L4
.L15:
cmpl $77, 8(%ebp)
jne .L16
movl $1, %eax
jmp .L4
.L16:
cmpl $78, 8(%ebp)
jne .L17
movl $1, %eax
jmp .L4
.L17:
cmpl $79, 8(%ebp)
jne .L18
movl $1, %eax
jmp .L4
.L18:
cmpl $80, 8(%ebp)
jne .L19
movl $1, %eax
jmp .L4
.L19:
cmpl $81, 8(%ebp)
jne .L20
movl $1, %eax
jmp .L4
.L20:
cmpl $82, 8(%ebp)
jne .L21
movl $1, %eax
jmp .L4
.L21:
cmpl $83, 8(%ebp)
jne .L22
movl $1, %eax
jmp .L4
.L22:
cmpl $84, 8(%ebp)
jne .L23
movl $1, %eax
jmp .L4
.L23:
cmpl $85, 8(%ebp)
jne .L24
movl $1, %eax
jmp .L4
.L24:
cmpl $86, 8(%ebp)
jne .L25
movl $1, %eax
jmp .L4
.L25:
cmpl $87, 8(%ebp)
jne .L26
movl $1, %eax
jmp .L4
.L26:
cmpl $88, 8(%ebp)
jne .L27
movl $1, %eax
jmp .L4
.L27:
cmpl $89, 8(%ebp)
jne .L28
movl $1, %eax
jmp .L4
.L28:
cmpl $90, 8(%ebp)
jne .L29
movl $1, %eax
jmp .L4
.L29:
movl $0, %eax
.L4:
popl %ebp
.cfi_def_cfa 4, 4
.cfi_restore 5
ret
.cfi_endproc
.LFE1:
.size is_letter2, .-is_letter2
.globl is_letter
.type is_letter, @function
is_letter:
.LFB2:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
cmpl $65, 8(%ebp)
je .L31
cmpl $66, 8(%ebp)
je .L31
cmpl $67, 8(%ebp)
je .L31
cmpl $68, 8(%ebp)
je .L31
cmpl $69, 8(%ebp)
je .L31
cmpl $70, 8(%ebp)
je .L31
cmpl $71, 8(%ebp)
je .L31
cmpl $72, 8(%ebp)
je .L31
cmpl $73, 8(%ebp)
je .L31
cmpl $74, 8(%ebp)
je .L31
cmpl $75, 8(%ebp)
je .L31
cmpl $76, 8(%ebp)
je .L31
cmpl $77, 8(%ebp)
je .L31
cmpl $78, 8(%ebp)
je .L31
cmpl $79, 8(%ebp)
je .L31
cmpl $80, 8(%ebp)
je .L31
cmpl $81, 8(%ebp)
je .L31
cmpl $82, 8(%ebp)
je .L31
cmpl $83, 8(%ebp)
je .L31
cmpl $84, 8(%ebp)
je .L31
cmpl $85, 8(%ebp)
je .L31
cmpl $86, 8(%ebp)
je .L31
cmpl $87, 8(%ebp)
je .L31
cmpl $88, 8(%ebp)
je .L31
cmpl $89, 8(%ebp)
je .L31
cmpl $90, 8(%ebp)
jne .L32
.L31:
movl $1, %eax
jmp .L33
.L32:
movl $0, %eax
.L33:
popl %ebp
.cfi_def_cfa 4, 4
.cfi_restore 5
ret
.cfi_endproc
In resume,it's the same program.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | mshildt |
| Solution 2 | |
| Solution 3 | Guillaume |
| Solution 4 | Spidey |
| Solution 5 |
