'-O2 optimizer flag causes basic for loop to fail
Have noticed very strange behavior when adding the -O2 optimizer flag when compiling a simple program.
#include <iostream>
int main() {
for (int i = 0; i < 10; i++) {
std::cout << i << std::endl;
}
return 0;
}
program output:
1
2
3
4
5
6
7
...
1228881
...
The program works as expected with the -O3 or -Ofast flags.
output of running clang++ -O2 -v test.cpp:
vm-hw06{mrussell}125: clang++ -O2 -v test.cpp
clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.co\
m/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /comp/15/bin
Found candidate GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Selected GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Candidate multilib: .;@m64
Selected multilib: .;@m64
"/h/mrussell/clang/bin/clang-10" -cc1 -triple x86_64-unknown-linux-gnu -emit-obj -disable-free -main-file-name test.cpp -mrelocation-model static -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -v -resource-dir /h/mrussell/clang/lib/clang/10.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0/x86_64-pc-linux-gnu -internal-isystem /h/mrussell
A curious point is that removing << endl fixes whatever is bugged. Likewise, replacing the std::cout line with a printf works as expected. Update - removing the << i but keeping the << std::endl also works - 10 blank lines printed without fail! Am flummoxed here, any help would be greatly appreciated!
PS: Note that I built clang++ based on this doc: https://btorpey.github.io/blog/2015/01/02/building-clang/ - have been using it to compile basic programs with no such issues for >1 year.
Update:
output of clang++ -O2 -S test.cpp
.text
.file "test.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
pushq %r14
.cfi_def_cfa_offset 24
pushq %rbx
.cfi_def_cfa_offset 32
.cfi_offset %rbx, -32
.cfi_offset %r14, -24
.cfi_offset %rbp, -16
xorl %r14d, %r14d
jmp .LBB0_1
.p2align 4, 0x90
.LBB0_4: # %if.end.i
# in Loop: Header=BB0_1 Depth=1
movq %rbp, %rdi
callq _ZNKSt5ctypeIcE13_M_widen_initEv
movq (%rbp), %rax
movq %rbp, %rdi
movl $10, %esi
callq *48(%rax)
.LBB0_5: # %_ZNKSt5ctypeIcE5widenEc.exit
# in Loop: Header=BB0_1 Depth=1
movsbl %al, %esi
movq %rbx, %rdi
callq _ZNSo3putEc
movq %rax, %rdi
callq _ZNSo5flushEv
addl $1, %r14d
cmpl $-101, %r14d
je .LBB0_6
.LBB0_1: # %for.body
# =>This Inner Loop Header: Depth=1
movl $_ZSt4cout, %edi
movl %r14d, %esi
callq _ZNSolsEi
movq %rax, %rbx
movq (%rax), %rax
movq -24(%rax), %rax
movq 240(%rbx,%rax), %rbp
testq %rbp, %rbp
je .LBB0_7
# %bb.2: # %_ZSt13__check_facetISt5ctypeIcEERKT_PS3_.exit
# in Loop: Header=BB0_1 Depth=1
cmpb $0, 56(%rbp)
je .LBB0_4
# %bb.3: # %if.then.i
# in Loop: Header=BB0_1 Depth=1
movzbl 67(%rbp), %eax
jmp .LBB0_5
.LBB0_6: # %for.cond.cleanup
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 24
popq %r14
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
retq
.LBB0_7: # %if.then.i10
.cfi_def_cfa_offset 32
callq _ZSt16__throw_bad_castv
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function _GLOBAL__sub_I_test.cpp
.type _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp: # @_GLOBAL__sub_I_test.cpp
.cfi_startproc
# %bb.0: # %entry
pushq %rax
.cfi_def_cfa_offset 16
movl $_ZStL8__ioinit, %edi
callq _ZNSt8ios_base4InitC1Ev
movl $_ZNSt8ios_base4InitD1Ev, %edi
movl $_ZStL8__ioinit, %esi
movl $__dso_handle, %edx
popq %rax
.cfi_def_cfa_offset 8
jmp __cxa_atexit # TAILCALL
.Lfunc_end1:
.size _GLOBAL__sub_I_test.cpp, .Lfunc_end1-_GLOBAL__sub_I_test.cpp
.cfi_endproc
# -- End function
.type _ZStL8__ioinit,@object # @_ZStL8__ioinit
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.section .init_array,"aw",@init_array
.p2align 3
.quad _GLOBAL__sub_I_test.cpp
.ident "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym _GLOBAL__sub_I_test.cpp
.addrsig_sym _ZStL8__ioinit
.addrsig_sym __dso_handle
.addrsig_sym _ZSt4cout
And, for reference, output of clang++ -S test.cpp
.text
.file "test.cpp"
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function __cxx_global_var_init
.type __cxx_global_var_init,@function
__cxx_global_var_init: # @__cxx_global_var_init
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movabsq $_ZStL8__ioinit, %rdi
callq _ZNSt8ios_base4InitC1Ev
movabsq $_ZNSt8ios_base4InitD1Ev, %rax
movq %rax, %rdi
movabsq $_ZStL8__ioinit, %rsi
movabsq $__dso_handle, %rdx
callq __cxa_atexit
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size __cxx_global_var_init, .Lfunc_end0-__cxx_global_var_init
.cfi_endproc
# -- End function
.text
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movl $0, -4(%rbp)
movl $0, -8(%rbp)
.LBB1_1: # %for.cond
# =>This Inner Loop Header: Depth=1
cmpl $10, -8(%rbp)
jge .LBB1_4
# %bb.2: # %for.body
# in Loop: Header=BB1_1 Depth=1
movl -8(%rbp), %esi
movabsq $_ZSt4cout, %rdi
callq _ZNSolsEi
movq %rax, %rdi
movabsq $_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_, %rsi
callq _ZNSolsEPFRSoS_E
# %bb.3: # %for.inc
# in Loop: Header=BB1_1 Depth=1
movl -8(%rbp), %eax
addl $1, %eax
movl %eax, -8(%rbp)
jmp .LBB1_1
.LBB1_4: # %for.end
xorl %eax, %eax
addq $16, %rsp
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end1:
.size main, .Lfunc_end1-main
.cfi_endproc
# -- End function
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function _GLOBAL__sub_I_test.cpp
.type _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp: # @_GLOBAL__sub_I_test.cpp
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
callq __cxx_global_var_init
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end2:
.size _GLOBAL__sub_I_test.cpp, .Lfunc_end2-_GLOBAL__sub_I_test.cpp
.cfi_endproc
# -- End function
.type _ZStL8__ioinit,@object # @_ZStL8__ioinit
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.section .init_array,"aw",@init_array
.p2align 3
.quad _GLOBAL__sub_I_test.cpp
.ident "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym __cxx_global_var_init
.addrsig_sym __cxa_atexit
.addrsig_sym _ZNSolsEi
.addrsig_sym _ZNSolsEPFRSoS_E
.addrsig_sym _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
.addrsig_sym _GLOBAL__sub_I_test.cpp
.addrsig_sym _ZStL8__ioinit
.addrsig_sym __dso_handle
.addrsig_sym _ZSt4cout
Solution 1:[1]
For anyone showing up to this thread looking for an answer - @AlanBirtles gave me some great tips. First was that the original build was from the trunk, rather than a tagged branch, and thus might be buggy. However, after rebuilding clang++ 12, 13, and 14 from source, no luck. As he mentioned later, however, there may be an issue with the compatibility of the libstdc++ in use. After a fresh build of gcc and clang from the new gcc, all is right with the universe. In some sense this is an unsatisfactory answer, as I'm not sure of the nature of the incompatibility. But, if someone stumbles across this thread with the same problem, you know what to do.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | optimus_prime |
