Branching and Calling Functions

We can see all of the relevant combinations of instructions with the following block of code:

  if ( ia ) { printf("x\n"); } else { printf("y\n"); }
  if ( ca ) { printf("x\n"); } else { printf("y\n"); }
  if ( la ) { printf("x\n"); } else { printf("y\n"); }
  if ( ca == 0 ) { printf("x\n"); } else { printf("y\n"); }
  if ( uca == 0 ) { printf("x\n"); } else { printf("y\n"); }

  if ( ia == ib ) { printf("x\n"); } else { printf("y\n"); }
  if ( ca == cb ) { printf("x\n"); } else { printf("y\n"); }
  if ( uca == ucb ) { printf("x\n"); } else { printf("y\n"); }
  if ( la == lb ) { printf("x\n"); } else { printf("y\n"); }

  if ( ia == 2 ) { printf("x\n"); } else { printf("y\n"); }
  if ( ca == 2 ) { printf("x\n"); } else { printf("y\n"); }
  if ( uca == 2 ) { printf("x\n"); } else { printf("y\n"); }
  if ( la == 2 ) { printf("x\n"); } else { printf("y\n"); }

  if ( 2 == ia ) { printf("x\n"); } else { printf("y\n"); }
  if ( 2 == ca ) { printf("x\n"); } else { printf("y\n"); }
  if ( 2 == uca ) { printf("x\n"); } else { printf("y\n"); }
  if ( 2 == la ) { printf("x\n"); } else { printf("y\n"); }

  if ( 0 == ia ) { printf("x\n"); } else { printf("y\n"); }
  if ( 0 == ca ) { printf("x\n"); } else { printf("y\n"); }
  if ( 0 == uca ) { printf("x\n"); } else { printf("y\n"); }
  if ( 0 == la ) { printf("x\n"); } else { printf("y\n"); }

For brevity, in the assembly we will only show the printf calls and else clause for the first case.

AMD64

The equivalent assembly is shown in the following tables, organized by related statements:

C	.s file	gdb
`if ( ia ) {`
	`cmpl $0, -8(%rbp)`	`cmpl $0x0,-0x8(%rbp)`
	`je LBB0_2`	`je 0x100000a1d`
`printf("x\n");`
	`leaq L_.str(%rip), %rdi`	`lea 0x58e(%rip),%rdi`
	`movb $0, %al`	`mov $0x0,%al`
	`callq _printf`	`call 0x100000f9c`
`}`
	`jmp LBB0_3`	`jmp 0x100000a2b`
`else {`
	`LBB0_2:`
`printf("y\n");`
	`leaq L_.str.1(%rip), %rdi`	`lea 0x581(%rip),%rdi`
	`movb $0, %al`	`mov $0x0,%al`
	`callq _printf`	`call 0x100000f9c`
`}`
	`LBB0_3:`
`if ( ca ) {`
	`cmpb $0, -29(%rbp)`	`cmpb $0x0,-0x1d(%rbp)`
	`je LBB0_8`	`je 0x100000a65`
`if ( la ) {`
	`cmpq $0, -48(%rbp)`	`cmpq $0x0,-0x30(%rbp)`
	`je LBB0_14`	`je 0x100000aae`
`if ( ca == 0 ) {`
	`movsbl -29(%rbp), %eax`	`movsbl -0x1d(%rbp),%eax`
	`cmpl $0, %eax`	`cmp $0x0,%eax`
	`jne LBB0_62`	`jne 0x100000d12`
`if ( uca == 0 ) {`
	`movzbl -32(%rbp), %eax`	`movzbl -0x20(%rbp),%eax`
	`cmpl $0, %eax`	`cmp $0x0,%eax`
	`jne LBB0_65`	`jne 0x100000d39`

Here, we see that evaluating an integer as a boolean expression is equivalent to comparing with 0. The comparison instruction is cmpl, cmpb, or cmpq. This tests for equality, and is typically followed by a jump instruction (je for “jump if equal” or jne for “jump if not equal”).

The argument to the jump instruction is a label in the assembly code, which when run becomes the address of the target instruction. Because of the else clause, the first block ends with a jump (jmp) to the instruction after the if/else statement. The gcc compiler is generating slightly different code for if (ia) than if (ia == 0), choosing a je for the former and jne for the latter, reversing the true and false blocks.

To call a function, we set up the arguments to the function, and then execute the callq instruction. In the assembly, we specify the symbol for the target function, and when run this is replaced with the address of the first instruction of the function. This is similar to what we saw for jmp in the conditional.

The instructions preceding callq will vary greatly, but it is worth taking a quick look at the leaq (load effective address) instruction. While movl -8(%rbp), -12(%rbp) would move the value of ia to ib, the instruction leaq -8(%rbp), -12(%rbp) would move the address rbp-8 to ib.

For variable comparisons, we see familiar patterns. One or both variables is loaded into a register, and then we do a comparison and conditional jump as in the previous block of statements.

C	.s file	gdb
`if ( ia == ib ) {`
	`movl -8(%rbp), %eax`	`mov -0x8(%rbp),%eax`
	`cmpl -12(%rbp), %eax`	`cmp -0xc(%rbp),%eax`
	`jne LBB0_20`	`jne 0x100000af9`
`if ( ca == cb ) {`
	`movsbl -29(%rbp), %eax`	`movsbl -0x1d(%rbp),%eax`
	`movsbl -30(%rbp), %ecx`	`movsbl -0x1e(%rbp),%ecx`
	`cmpl %ecx, %eax`	`cmp %ecx,%eax`
	`jne LBB0_26`	`jne 0x100000b49`
`if ( uca == ucb ) {`
	`movzbl -32(%rbp), %eax`	`movzbl -0x20(%rbp),%eax`
	`movzbl -33(%rbp), %ecx`	`movzbl -0x21(%rbp),%ecx`
	`cmpl %ecx, %eax`	`cmp %ecx,%eax`
	`jne LBB0_29`	`jne 0x100000b73`
`if ( la == lb ) {`
	`movq -48(%rbp), %rax`	`mov -0x30(%rbp),%rax`
	`cmpq -56(%rbp), %rax`	`cmp -0x38(%rbp),%rax`
	`jne LBB0_32`	`jne 0x100000b9b`

C	.s file	gdb
`if ( ia == 2 ) {`
	`cmpl $2, -8(%rbp)`	`cmpl $0x2,-0x8(%rbp)`
	`jne LBB0_38`	`jne 0x100000be7`
`if ( ca == 2 ) {`
	`movsbl -29(%rbp), %eax`	`movsbl -0x1d(%rbp),%eax`
	`cmpl $2, %eax`	`cmp $0x2,%eax`
	`jne LBB0_44`	`jne 0x100000c32`
`if ( uca == 2 ) {`
	`movzbl -32(%rbp), %eax`	`movzbl -0x20(%rbp),%eax`
	`cmpl $2, %eax`	`cmp $0x2,%eax`
	`jne LBB0_47`	`jne 0x100000c59`
`if ( la == 2 ) {`
	`cmpq $2, -48(%rbp)`	`cmpq $0x2,-0x30(%rbp)`
	`jne LBB0_50`	`jne 0x100000c7e`
`if ( 2 == ia ) {`
	`movl $2, %eax`	`mov $0x2,%eax`
	`cmpl -8(%rbp), %eax`	`cmp -0x8(%rbp),%eax`
	`jne LBB0_74`	`jne 0x100000dab`
`if ( 2 == ca ) {`
	`movsbl -29(%rbp), %ecx`	`movsbl -0x1d(%rbp),%ecx`
	`movl $2, %eax`	`mov $0x2,%eax`
	`cmpl %ecx, %eax`	`cmp %ecx,%eax`
	`jne LBB0_80`	`jne 0x100000dfe`
`if ( 2 == uca ) {`
	`movzbl -32(%rbp), %ecx`	`movzbl -0x20(%rbp),%ecx`
	`movl $2, %eax`	`mov $0x2,%eax`
	`cmpl %ecx, %eax`	`cmp %ecx,%eax`
	`jne LBB0_83`	`jne 0x100000e29`
`if ( 2 == la ) {`
	`movl $2, %eax`	`mov $0x2,%eax`
	`cmpq -48(%rbp), %rax`	`cmp -0x30(%rbp),%rax`
	`jne LBB0_86`	`jne 0x100000e52`

Here we see that comparing a variable with a literal integer value, we have very similar code to when we were doing a comparison with 0. The order in which we compare (ia == 2 vs 2 == ia) produces the same assembly.

C	.s file	gdb
`if ( 0 == ia ) {`
	`xorl %eax, %eax`	`xor %eax,%eax`
	`cmpl -8(%rbp), %eax`	`cmp -0x8(%rbp),%eax`
	`jne LBB0_92`	`jne 0x100000ea0`
`if ( 0 == ca ) {`
	`movsbl -29(%rbp), %ecx`	`movsbl -0x1d(%rbp),%ecx`
	`xorl %eax, %eax`	`xor %eax,%eax`
	`cmpl %ecx, %eax`	`cmp %ecx,%eax`
	`jne LBB0_98`	`jne 0x100000eed`
`if ( 0 == uca ) {`
	`movzbl -32(%rbp), %ecx`	`movzbl -0x20(%rbp),%ecx`
	`xorl %eax, %eax`	`xor %eax,%eax`
	`cmpl %ecx, %eax`	`cmp %ecx,%eax`
	`jne LBB0_101`	`jne 0x100000f15`

`if ( 0 == la ) {`
	`xorl %eax, %eax`	`xor %eax,%eax`
	`cmpq -48(%rbp), %rax`	`cmp -0x30(%rbp),%rax`
	`jne LBB0_104`	`jne 0x100000f3b`

The comparison 0 == ia interestingly produces different instructions. Instead of using a literal $0, we instead use xorl to set the value of eax to 0, and compare with that. The result of the xor is stored in the second argument.

AArch64

C	.s file	gdb
`if ( ia ) {`
	`ldr r3, [fp, #-8]`	`ldr r3, [r11, #-8]`
	`cmp r3, #0`	`cmp r3, #0`
	`beq .L2`	`beq 0x10890`
`printf("x\n");`
	`ldr r0, .L75`	`ldr r0, [pc, #1220]`
	`bl puts`	`bl 0x102e4`
	`b .L3`	`b 0x10898`
`}`
`else {`
	`.L2:`
`printf("y\n");`
	`ldr r0, .L75+4`	`ldr r0, [pc, #1212]`
	`bl puts`	`bl 0x102e4`
`}`
	`.L3:`
`if ( ca ) {`
	`ldrb r3, [fp, #-13]`	`ldrb r3, [r11, #-13]`
	`cmp r3, #0`	`cmp r3, #0`
	`beq .L6`	`beq 0x108d0`
`if ( la ) {`
	`ldr r3, [fp, #-20]`	`ldr r3, [r11, #-20]`
	`cmp r3, #0`	`cmp r3, #0`
	`beq .L10`	`beq 0x10910`
`if ( ca == 0 ) {`
	`ldrb r3, [fp, #-13]`	`ldrb r3, [r11, #-13]`
	`cmp r3, #0`	`cmp r3, #0`
	`bne .L42`	`bne 0x10b28`
`if ( uca == 0 ) {`
	`ldrb r3, [fp, #-14]`	`ldrb r3, [r11, #-14]`
	`cmp r3, #0`	`cmp r3, #0`
	`bne .L44`	`bne 0x10b48`

For the comparisons, we see the same pattern as for AMD64: compare with 0 and then either “branch-if-equal” (beq) or “branch-if-not-equal” (bne), depending on whether we are doing if(ia) or if(ia==0). The main difference is that, as is often the case, for AArch64 we first load (with ldr or ldrb) the variable’s value into a register.

When calling a function, we again prepare the stack, and then “branch with link” (bl) to the function, which is the equivalent of callq on AMD64. For the conditional flow, the “branch” (b) instruction is the equivalent of jmp.

C	.s file	gdb
`if ( ia == ib ) {`
	`ldr r2, [fp, #-8]`	`ldr r2, [r11, #-8]`
	`ldr r3, [fp, #-28]`	`ldr r3, [r11, #-28]`
	`cmp r2, r3`	`cmp r2, r3`
	`bne .L14`	`bne 0x10954`
`if ( ca == cb ) {`
	`ldrb r2, [fp, #-13]`	`ldrb r2, [r11, #-13]`
	`ldrb r3, [fp, #-33]`	`ldrb r3, [r11, #-33]`
	`cmp r2, r3`	`cmp r2, r3`
	`bne .L18`	`bne 0x1099c`
`if ( uca == ucb ) {`
	`ldrb r2, [fp, #-14]`	`ldrb r2, [r11, #-14]`
	`ldrb r3, [fp, #-34]`	`ldrb r3, [r11, #-34]`
	`cmp r2, r3`	`cmp r2, r3`
	`bne .L20`	`bne 0x109c0`
`if ( la == lb ) {`
	`ldr r2, [fp, #-20]`	`ldr r2, [r11, #-20]`
	`ldr r3, [fp, #-40]`	`ldr r3, [r11, #-40]`
	`cmp r2, r3`	`cmp r2, r3`
	`bne .L22`	`bne 0x109e4`

When comparing two integer variables, we see almost the same pattern, but instead of comparing a register with a literal, we load the values into two registers and compare those. Other than for int, the pattern is identical to what we saw with AMD64.

Comparing with a non-zero literal looks the same as comparing with zero. We still load the value into a register, and compare that register with the literal.

Reversing the order of the variable and literal have no effect on the generated assembly, in contrast with the equivalent assembly generated for AMD64.

C	.s file	gdb
`if ( ia == 2 ) {`
	`ldr r3, [fp, #-8]`	`ldr r3, [r11, #-8]`
	`cmp r3, #2`	`cmp r3, #2`
	`bne .L26`	`bne 0x10a28`
`if ( ca == 2 ) {`
	`ldrb r3, [fp, #-13]`	`ldrb r3, [r11, #-13]`
	`cmp r3, #2`	`cmp r3, #2`
	`bne .L30`	`bne 0x10a68`
`if ( uca == 2 ) {`
	`ldrb r3, [fp, #-14]`	`ldrb r3, [r11, #-14]`
	`cmp r3, #2`	`cmp r3, #2`
	`bne .L32`	`bne 0x10a88`
`if ( la == 2 ) {`
	`ldr r3, [fp, #-20]`	`ldr r3, [r11, #-20]`
	`cmp r3, #2`	`cmp r3, #2`
	`bne .L34`	`bne 0x10aa8`
`if ( 2 == ia ) {`
	`ldr r3, [fp, #-8]`	`ldr r3, [r11, #-8]`
	`cmp r3, #2`	`cmp r3, #2`
	`bne .L50`	`bne 0x10ba8`
`if ( 2 == ca ) {`
	`ldrb r3, [fp, #-13]`	`ldrb r3, [r11, #-13]`
	`cmp r3, #2`	`cmp r3, #2`
	`bne .L54`	`bne 0x10be8`
`if ( 2 == uca ) {`
	`ldrb r3, [fp, #-14]`	`ldrb r3, [r11, #-14]`
	`cmp r3, #2`	`cmp r3, #2`
	`bne .L56`	`bne 0x10c08`
`if ( 2 == la ) {`
	`ldr r3, [fp, #-20]`	`ldr r3, [r11, #-20]`
	`cmp r3, #2`	`cmp r3, #2`
	`bne .L58`	`bne 0x10c28`