'Why ff sign extension for an unsigned integer?

This C code:

#include <stdio.h>
#define uint64 unsigned long long

unsigned long long main() {
    unsigned int i = 0x50da;
    unsigned int j = 0xc0ffee;
    uint64 k = 0x7ea707a11ed;
    k ^= ~(i & j) | 0x7ab00;
    return k;
}          

compiles to this assembly code:

   0x555555555129 <main+4>:     movl   $0x50da,-0x4(%rbp)
   0x555555555130 <main+11>:    movl   $0xc0ffee,-0x8(%rbp)
   0x555555555137 <main+18>:    movabs $0x7ea707a11ed,%rax
   0x555555555141 <main+28>:    mov    %rax,-0x10(%rbp)
   0x555555555145 <main+32>:    mov    -0x4(%rbp),%eax
   0x555555555148 <main+35>:    and    -0x8(%rbp),%eax
   0x55555555514b <main+38>:    not    %eax
   0x55555555514d <main+40>:    or     $0x7ab00,%eax
   0x555555555152 <main+45>:    mov    %eax,%eax
   0x555555555154 <main+47>:    xor    %rax,-0x10(%rbp)

While stepping through the assembly code in a debugger, after the and for (i&j), I observe 0x50ca in $rax.

But after the not, I observe 0xffffaf35 in $rax, whereas I expected 0xaf35. Why has this result been extended by ffff? It should be either 0x0000af35 or no extension at all.



Solution 1:[1]

In your C implementation, unsigned int has 32 bits. So i & j is 000050CA16. Then ~(i & j) is FFFFAF3516. The compiler has faithfully implemented this with not %eax, which performs a 32-bit bitwise NOT in the RAX/EAX register.

If you want to truncate this to a 16-bit result, you could use (uint16_t) ~(i & j), using the uint16_t type declared in <stdint.h>.

Solution 2:[2]

It's not the asm per se. We can see things with C code if we go step-by-step.

It should be either 0x0000af35 or no extension at all.

No! You're not accounting for the upper zero bits that get flipped to one with the ~ (bitwise complement or "not" operator).

~0 is ~0x00000000 and that goes to: 0xFFFFFFFF

~1 is ~0x00000001 and that goes to: 0xFFFFFFFE


Here is a test program that illustrates the steps:

#include <stdio.h>
#include <stdint.h>

// I hate stdint.h :-(
typedef uint32_t u32;
typedef uint64_t u64;

#define DEF(_wid,_sym,_expr) \
    u##_wid _sym = _expr; \
    def(_wid,_sym,#_sym,_expr,#_expr)

void
showb(int wid,u64 sym)
{
    char buf[64 + 20];
    int rhs;
    int lhs;

    rhs = 0;
    lhs = 0;
    for (;  rhs < wid;  ++rhs, sym >>= 1) {
        buf[lhs++] = (sym & 1) ? '1' : '0';
        if ((rhs % 8) == 7)
            buf[lhs++] = ' ';
    }

    buf[lhs] = 0;

    rhs = lhs - 1;
    for (lhs = 0;  lhs < rhs;  ++lhs, --rhs) {
        char tmp = buf[lhs];
        buf[lhs] = buf[rhs];
        buf[rhs] = tmp;
    }

    printf("%s",buf);
}

void
show(int wid,u64 sym,const char *stxt)
{

    u64 mask = 0;
    for (int bit = 1;  bit <= wid;  ++bit) {
        mask <<= 1;
        mask |= 1;
    }

    //printf("%16.16llX MASK/DEBUG\n",mask);
    sym &= mask;

    int ndig = wid / 4;
    printf(" %*.*llX\n",ndig,ndig,sym);
    showb(wid,sym);
    printf("\n");
}

void
def(int wid,u64 sym,const char *stxt,u64 expr,const char *etxt)
{

    static int sep = 0;
    if (sep)
        printf("\n");
    sep = 1;

    printf("u%d %s = %s\n",wid,stxt,etxt);

    //show(wid,expr,"E");
    show(wid,sym,"");
    if (sym != expr)
        printf("MISMATCH\n");
}

u64
result(void)
{

    DEF(32,i,0x50da);
    DEF(32,j,0xc0ffee);
    DEF(64,k,0x7ea707a11ed);

    // k ^= ~(i & j) | 0x7ab00;
    DEF(32,t1,i & j);
    DEF(32,t2,~t1);
    DEF(32,t3,t2 | 0x7afb00);
    DEF(64,t4,t3);
    DEF(64,t5,k ^ t4);

    return t5;
}

int
main(void)
{

    result();

    return 0;
}

Here is the program output:

u32 i = 0x50da
 000050DA
 00000000 00000000 01010000 11011010

u32 j = 0xc0ffee
 00C0FFEE
 00000000 11000000 11111111 11101110

u64 k = 0x7ea707a11ed
 000007EA707A11ED
 00000000 00000000 00000111 11101010 01110000 01111010 00010001 11101101

u32 t1 = i & j
 000050CA
 00000000 00000000 01010000 11001010

u32 t2 = ~t1
 FFFFAF35
 11111111 11111111 10101111 00110101

u32 t3 = t2 | 0x7afb00
 FFFFFF35
 11111111 11111111 11111111 00110101

u64 t4 = t3
 00000000FFFFFF35
 00000000 00000000 00000000 00000000 11111111 11111111 11111111 00110101

u64 t5 = k ^ t4
 000007EA8F85EED8
 00000000 00000000 00000111 11101010 10001111 10000101 11101110 11011000

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Eric Postpischil
Solution 2