'if else code blocks ... or one if with multiple OR ( | |) conditions within it ? in C

I was writing a small text matching program in C, which basically checks for the presence of a few characters in some strings (in form of char arrays). Its working now and I have a code block like this:

if (c == 'A') return 1;
else if (c == 'B') return 1;
else if (c == 'C') return 1;
....
....
else if (c == 'Z') return 1;
else return 0;

Is the block above faster? Or would this be faster?

if (c == 'A' || c == 'B' || c == 'C' ||....|| c == 'Z') return 1;
else return 0;

By fast I mean literally fast, i.e. if I run a simple timer from the start of the program till the end which could potentially give a shorter execution time?



Solution 1:[1]

I'd recommend doing the following:

#include <ctype.h>

...

return isupper(c)

Instead of manually checking all of them. The standard C library functions are reasonably fast so performance should be acceptable.

Solution 2:[2]

An optimizing compiler will handle both forms likewise.

Leave such micro-optimizations to the compiler. What also matters is the readability of your source code.

(Of course you need to enable optimizations, for GCC -e.g. the recent GCC 4.8- you'll compile with gcc -O2).

And you really need to benchmark to be sure (because tons of other factors also matter: in particular cache locality) what is the best. You could even use some more fancy algorithms (e.g. testing more frequent letters, like E, before rare ones like Z). Look for search trees for more.

Look for instance into the generated assembly code (use gcc -O2 -fverbose-asm -S to get it) or look into the internal representations, e.g. using the the MELT probe (or passing -fdump-tree-all -giving many dump files to you- to gcc).

With GCC extensions, for your specific example, you could even code case ranges like this:

 switch (c) {
    case 'A' ... 'Z': return 1;
    default: return 0;
 }

the above case range assmes that the character encoding is a superset of ASCII. It won't work for EBCDIC

Actually, switch optimization is complex. See this paper etc....

And actually, you want to use isalpha(3) from <ctype.h> (in the C99 standard).

Testing what is a letter is not that simple: Is é or ? a latter for you? For me it is one (they both are vowels and both need more than one byte in UTF8)

Be cautious about the common UTF-8 encoding: some letters (notably from non English languages or alphabet) are encoded with several bytes. Look e.g. at Glib Unicode Manipulation functions.

Solution 3:[3]

If you have a larger number of conditions, a switch block would be better:

switch(c) {
   case 'A':
   case 'B':
   // (...)
   case 'Z': return 1;
   default: return 0;
}

Solution 4:[4]

Without entering in the scope of compiler optimization, assembly generation and better algorithms, what other posters missed is that the single if with multiple test cases and multiple ORs (||) will short-circuit after the first match. Effectively, the first and the second snippets of code will generate equivalent ASM in terms of speed.

Solution 5:[5]

As other mentioned,makes no different. The same count of cmp instructions will be generated in both ifs conditions(it is not considering any optmization by compiler)

EDIT:

Consider this C functions:

int is_letter2(int c)
{
  if(c == 'A') return 1;
  else if(c == 'B') return 1;
  else if(c == 'C') return 1;
  else if(c == 'D') return 1;
  else if(c == 'E') return 1;
  else if(c == 'F') return 1;
  else if(c == 'G') return 1;
  else if(c == 'H') return 1;
  else if(c == 'I') return 1;
  else if(c == 'J') return 1;
  else if(c == 'K') return 1;
  else if(c == 'L') return 1;
  else if(c == 'M') return 1;
  else if(c == 'N') return 1;
  else if(c == 'O') return 1;
  else if(c == 'P') return 1;
  else if(c == 'Q') return 1;
  else if(c == 'R') return 1;
  else if(c == 'S') return 1;
  else if(c == 'T') return 1;
  else if(c == 'U') return 1;
  else if(c == 'V') return 1;
  else if(c == 'W') return 1;
  else if(c == 'X') return 1;
  else if(c == 'Y') return 1;
  else if(c == 'Z') return 1;
  else return 0;
}

int is_letter(int c)
{
  if(c == 'A' ||c == 'B' ||c == 'C' ||c == 'D' ||c == 'E' ||c == 'F' ||c == 'G' ||c == 'H' ||c == 'I' ||c == 'J' ||c == 'K' ||c == 'L' ||c == 'M' ||c == 'N' ||c == 'O' ||c == 'P' ||c == 'Q' ||c == 'R' ||c == 'S' ||c == 'T' ||c == 'U' ||c == 'V' ||c == 'W' ||c == 'X' ||c == 'Y' ||c == 'Z')
    return 1;
 else return 0;
}

And the assembly output: It's almost the same. Check out:

is_letter2:
.LFB1:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    cmpl    $65, 8(%ebp)
    jne .L3
    movl    $1, %eax
    jmp .L4
.L3:
    cmpl    $66, 8(%ebp)
    jne .L5
    movl    $1, %eax
    jmp .L4
.L5:
    cmpl    $67, 8(%ebp)
    jne .L6
    movl    $1, %eax
    jmp .L4
.L6:
    cmpl    $68, 8(%ebp)
    jne .L7
    movl    $1, %eax
    jmp .L4
.L7:
    cmpl    $69, 8(%ebp)
    jne .L8
    movl    $1, %eax
    jmp .L4
.L8:
    cmpl    $70, 8(%ebp)
    jne .L9
    movl    $1, %eax
    jmp .L4
.L9:
    cmpl    $71, 8(%ebp)
    jne .L10
    movl    $1, %eax
    jmp .L4
.L10:
    cmpl    $72, 8(%ebp)
    jne .L11
    movl    $1, %eax
    jmp .L4
.L11:
    cmpl    $73, 8(%ebp)
    jne .L12
    movl    $1, %eax
    jmp .L4
.L12:
    cmpl    $74, 8(%ebp)
    jne .L13
    movl    $1, %eax
    jmp .L4
.L13:
    cmpl    $75, 8(%ebp)
    jne .L14
    movl    $1, %eax
    jmp .L4
.L14:
    cmpl    $76, 8(%ebp)
    jne .L15
    movl    $1, %eax
    jmp .L4
.L15:
    cmpl    $77, 8(%ebp)
    jne .L16
    movl    $1, %eax
    jmp .L4
.L16:
    cmpl    $78, 8(%ebp)
    jne .L17
    movl    $1, %eax
    jmp .L4
.L17:
    cmpl    $79, 8(%ebp)
    jne .L18
    movl    $1, %eax
    jmp .L4
.L18:
    cmpl    $80, 8(%ebp)
    jne .L19
    movl    $1, %eax
    jmp .L4
.L19:
    cmpl    $81, 8(%ebp)
    jne .L20
    movl    $1, %eax
    jmp .L4
.L20:
    cmpl    $82, 8(%ebp)
    jne .L21
    movl    $1, %eax
    jmp .L4
.L21:
    cmpl    $83, 8(%ebp)
    jne .L22
    movl    $1, %eax
    jmp .L4
.L22:
    cmpl    $84, 8(%ebp)
    jne .L23
    movl    $1, %eax
    jmp .L4
.L23:
    cmpl    $85, 8(%ebp)
    jne .L24
    movl    $1, %eax
    jmp .L4
.L24:
    cmpl    $86, 8(%ebp)
    jne .L25
    movl    $1, %eax
    jmp .L4
.L25:
    cmpl    $87, 8(%ebp)
    jne .L26
    movl    $1, %eax
    jmp .L4
.L26:
    cmpl    $88, 8(%ebp)
    jne .L27
    movl    $1, %eax
    jmp .L4
.L27:
    cmpl    $89, 8(%ebp)
    jne .L28
    movl    $1, %eax
    jmp .L4
.L28:
    cmpl    $90, 8(%ebp)
    jne .L29
    movl    $1, %eax
    jmp .L4
.L29:
    movl    $0, %eax
.L4:
    popl    %ebp
    .cfi_def_cfa 4, 4
    .cfi_restore 5
    ret
    .cfi_endproc
.LFE1:
    .size   is_letter2, .-is_letter2
    .globl  is_letter
    .type   is_letter, @function
is_letter:
.LFB2:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    cmpl    $65, 8(%ebp)
    je  .L31
    cmpl    $66, 8(%ebp)
    je  .L31
    cmpl    $67, 8(%ebp)
    je  .L31
    cmpl    $68, 8(%ebp)
    je  .L31
    cmpl    $69, 8(%ebp)
    je  .L31
    cmpl    $70, 8(%ebp)
    je  .L31
    cmpl    $71, 8(%ebp)
    je  .L31
    cmpl    $72, 8(%ebp)
    je  .L31
    cmpl    $73, 8(%ebp)
    je  .L31
    cmpl    $74, 8(%ebp)
    je  .L31
    cmpl    $75, 8(%ebp)
    je  .L31
    cmpl    $76, 8(%ebp)
    je  .L31
    cmpl    $77, 8(%ebp)
    je  .L31
    cmpl    $78, 8(%ebp)
    je  .L31
    cmpl    $79, 8(%ebp)
    je  .L31
    cmpl    $80, 8(%ebp)
    je  .L31
    cmpl    $81, 8(%ebp)
    je  .L31
    cmpl    $82, 8(%ebp)
    je  .L31
    cmpl    $83, 8(%ebp)
    je  .L31
    cmpl    $84, 8(%ebp)
    je  .L31
    cmpl    $85, 8(%ebp)
    je  .L31
    cmpl    $86, 8(%ebp)
    je  .L31
    cmpl    $87, 8(%ebp)
    je  .L31
    cmpl    $88, 8(%ebp)
    je  .L31
    cmpl    $89, 8(%ebp)
    je  .L31
    cmpl    $90, 8(%ebp)
    jne .L32
.L31:
    movl    $1, %eax
    jmp .L33
.L32:
    movl    $0, %eax
.L33:
    popl    %ebp
    .cfi_def_cfa 4, 4
    .cfi_restore 5
    ret
    .cfi_endproc

In resume,it's the same program.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 mshildt
Solution 2
Solution 3 Guillaume
Solution 4 Spidey
Solution 5