'Why ff sign extension for an unsigned integer?
This C code:
#include <stdio.h>
#define uint64 unsigned long long
unsigned long long main() {
unsigned int i = 0x50da;
unsigned int j = 0xc0ffee;
uint64 k = 0x7ea707a11ed;
k ^= ~(i & j) | 0x7ab00;
return k;
}
compiles to this assembly code:
0x555555555129 <main+4>: movl $0x50da,-0x4(%rbp)
0x555555555130 <main+11>: movl $0xc0ffee,-0x8(%rbp)
0x555555555137 <main+18>: movabs $0x7ea707a11ed,%rax
0x555555555141 <main+28>: mov %rax,-0x10(%rbp)
0x555555555145 <main+32>: mov -0x4(%rbp),%eax
0x555555555148 <main+35>: and -0x8(%rbp),%eax
0x55555555514b <main+38>: not %eax
0x55555555514d <main+40>: or $0x7ab00,%eax
0x555555555152 <main+45>: mov %eax,%eax
0x555555555154 <main+47>: xor %rax,-0x10(%rbp)
While stepping through the assembly code in a debugger, after the and for (i&j), I observe 0x50ca in $rax.
But after the not, I observe 0xffffaf35 in $rax, whereas I expected 0xaf35. Why has this result been extended by ffff? It should be either 0x0000af35 or no extension at all.
Solution 1:[1]
In your C implementation, unsigned int has 32 bits. So i & j is 000050CA16. Then ~(i & j) is FFFFAF3516. The compiler has faithfully implemented this with not %eax, which performs a 32-bit bitwise NOT in the RAX/EAX register.
If you want to truncate this to a 16-bit result, you could use (uint16_t) ~(i & j), using the uint16_t type declared in <stdint.h>.
Solution 2:[2]
It's not the asm per se. We can see things with C code if we go step-by-step.
It should be either 0x0000af35 or no extension at all.
No! You're not accounting for the upper zero bits that get flipped to one with the ~ (bitwise complement or "not" operator).
~0 is ~0x00000000 and that goes to: 0xFFFFFFFF
~1 is ~0x00000001 and that goes to: 0xFFFFFFFE
Here is a test program that illustrates the steps:
#include <stdio.h>
#include <stdint.h>
// I hate stdint.h :-(
typedef uint32_t u32;
typedef uint64_t u64;
#define DEF(_wid,_sym,_expr) \
u##_wid _sym = _expr; \
def(_wid,_sym,#_sym,_expr,#_expr)
void
showb(int wid,u64 sym)
{
char buf[64 + 20];
int rhs;
int lhs;
rhs = 0;
lhs = 0;
for (; rhs < wid; ++rhs, sym >>= 1) {
buf[lhs++] = (sym & 1) ? '1' : '0';
if ((rhs % 8) == 7)
buf[lhs++] = ' ';
}
buf[lhs] = 0;
rhs = lhs - 1;
for (lhs = 0; lhs < rhs; ++lhs, --rhs) {
char tmp = buf[lhs];
buf[lhs] = buf[rhs];
buf[rhs] = tmp;
}
printf("%s",buf);
}
void
show(int wid,u64 sym,const char *stxt)
{
u64 mask = 0;
for (int bit = 1; bit <= wid; ++bit) {
mask <<= 1;
mask |= 1;
}
//printf("%16.16llX MASK/DEBUG\n",mask);
sym &= mask;
int ndig = wid / 4;
printf(" %*.*llX\n",ndig,ndig,sym);
showb(wid,sym);
printf("\n");
}
void
def(int wid,u64 sym,const char *stxt,u64 expr,const char *etxt)
{
static int sep = 0;
if (sep)
printf("\n");
sep = 1;
printf("u%d %s = %s\n",wid,stxt,etxt);
//show(wid,expr,"E");
show(wid,sym,"");
if (sym != expr)
printf("MISMATCH\n");
}
u64
result(void)
{
DEF(32,i,0x50da);
DEF(32,j,0xc0ffee);
DEF(64,k,0x7ea707a11ed);
// k ^= ~(i & j) | 0x7ab00;
DEF(32,t1,i & j);
DEF(32,t2,~t1);
DEF(32,t3,t2 | 0x7afb00);
DEF(64,t4,t3);
DEF(64,t5,k ^ t4);
return t5;
}
int
main(void)
{
result();
return 0;
}
Here is the program output:
u32 i = 0x50da
000050DA
00000000 00000000 01010000 11011010
u32 j = 0xc0ffee
00C0FFEE
00000000 11000000 11111111 11101110
u64 k = 0x7ea707a11ed
000007EA707A11ED
00000000 00000000 00000111 11101010 01110000 01111010 00010001 11101101
u32 t1 = i & j
000050CA
00000000 00000000 01010000 11001010
u32 t2 = ~t1
FFFFAF35
11111111 11111111 10101111 00110101
u32 t3 = t2 | 0x7afb00
FFFFFF35
11111111 11111111 11111111 00110101
u64 t4 = t3
00000000FFFFFF35
00000000 00000000 00000000 00000000 11111111 11111111 11111111 00110101
u64 t5 = k ^ t4
000007EA8F85EED8
00000000 00000000 00000111 11101010 10001111 10000101 11101110 11011000
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Eric Postpischil |
| Solution 2 |
