Records (<code xmlns:svg="http://www.w3.org/2000/svg" class="code-inline tex2jax

Section 15.3 Records (`struct`s)

While an array is useful for grouping homogeneous data items that are of the same data type, a record (struct in C/C++) is used for grouping heterogeneous data items, which may be of the same or different data types. For example, an array is probably better for storing a list of test scores in a program that works with the \(i^{th}\) test score, but a struct might be better for storing the coordinates of a point on an \(x - y\) graph.

The data elements in a struct are usually called fields. Accessing a field in a struct also requires two address-related items:

The name of the struct, and
The name of the field.

Consider the C program in Listing 15.3.1.

/* structField1.c
 * Allocates two structs and assigns a value to each field
 * in each struct.
 * 2017-09-29: Bob Plantz
 */

#include <stdio.h>

struct theTag {  /* "template" for struct */
  char aByte;
  int anInt;
  char anotherByte;
};

int main(void)
{
  struct theTag x;
  struct theTag y;

  x.aByte = 'a';
  x.anInt = 123;
  x.anotherByte = 'b';
  y.aByte = '1';
  y.anInt = 456;
  y.anotherByte = '2';

  printf("x: %c, %i, %c\ny: %c, %i, %c\n",
          x.aByte, x.anInt, x.anotherByte,
          y.aByte, y.anInt, y.anotherByte);
  return 0;
}

Listing 15.3.1. Two struct variables. (C)

The first thing we do is to define a new struct data type:

struct aTag {  /* Define new struct type */
  char aByte;
  int anInt;
  char anotherByte;
};

The tag name is a C identifier created by the programmer. Since this is a nonsense program, we have simply used aTag. Once a tag has been defined, we create variables of this new data type in the usual way:

struct aTag x;
struct aTag y;

The tag name is not required, but then you would have to define the fields for each struct variable:

struct {
  char aByte;
  int anInt;
  char anotherByte;
} x;
struct {
  char aByte;
  int anInt;
  char anotherByte;
} y;

This is entirely equivalent to the use of the tag, but it greatly increases the chances of making an error. Also, as you will see in the following sections, defining a tag is necessary for passing a struct as an argument to a function.

Assignment to each of the three fields in the “x” struct is accomplished by giving the name of the struct variable, followed by a dot (.), followed by the name of the field:

x.aByte = 'a';
x.anInt = 123;
x.anotherByte = 'b';

The assembly language generated by the compiler for the program in Listing 15.3.1 is shown in Listing 15.3.2.

        .arch   armv6
        .file   "structField1.c"
        .section  .rodata
        .align  2
.LC0:
        .ascii  "x: %c, %i, %c\012y: %c, %i, %c\012\000"
        .text
        .align  2
        .global main
        .syntax unified
        .arm
        .fpu vfp
        .type   main, %function
main:
        @ args = 0, pretend = 0, frame = 24
        @ frame_needed = 1, uses_anonymous_args = 0
        push    {fp, lr}
        add     fp, sp, #4
        sub     sp, sp, #40
        mov     r3, #97
        strb    r3, [fp, #-16]    @@ x.abyte = 'a';
        mov     r3, #123
        str     r3, [fp, #-12]    @@ x.anInt = 123;
        mov     r3, #98
        strb    r3, [fp, #-8]     @@ x.anotherByte = 'b';
        mov     r3, #49
        strb    r3, [fp, #-28]    @@ y.abyte = '1';
        mov     r3, #456
        str     r3, [fp, #-24]    @@ y.anInt = 456;
        mov     r3, #50
        strb    r3, [fp, #-20]    @@ y.anotherByte = '2';
        ldrb    r3, [fp, #-16]  @ zero_extendqisi2
        mov     ip, r3
        ldr     r2, [fp, #-12]    @@ x.anInt
        ldrb    r3, [fp, #-8]   @ zero_extendqisi2
        mov     lr, r3
        ldrb    r3, [fp, #-28]  @ zero_extendqisi2
        mov     r1, r3
        ldr     r3, [fp, #-24]
        ldrb    r0, [fp, #-20]  @ zero_extendqisi2
        str     r0, [sp, #8]      @@ y.anotherByte
        str     r3, [sp, #4]      @@ y.anInt
        str     r1, [sp]          @@ y.aByte
        mov     r3, lr            @@ x.aByte
        mov     r1, ip            @@ x.anotherByte
        ldr     r0, .L3           @@ formatting string
        bl      printf
        mov     r3, #0
        mov     r0, r3
        sub     sp, fp, #4
        @ sp needed
        pop     {fp, pc}
.L4:
        .align  2
.L3:
        .word   .LC0
        .ident  "GCC: (Raspbian 6.3.0-18+rpi1) 6.3.0 20170516"

Listing 15.3.2. Two struct variables. (gcc asm)

We can see that the compiler adds the (negative) offset of each field in each struct to the address in the frame pointer to access the respective field. For example, x.aByte = 'a'; is implemented with:

mov     r3, #97
strb    r3, [fp, #-16]

You can see the relative offsets in the pictorial view of the stack frame in Figure 15.3.3 for the program in Listing 15.3.1. The offset for each field in each struct is relative to the fp. Shaded areas represent unused memory.

Figure 15.3.3. Stack frame of the program in Listing 15.3.1 showing the location of the fields in each struct. The offsets are relative to the fp.

I take a somewhat different approach to accessing the fields of each struct in my assembly language solution, Listing 15.3.4.

# structField2.s
# Allocates two structs and assigns a value to each field
# in each struct, then displays the values.
@ 2017-09-29: Bob Plantz

@ Define my Raspberry Pi
        .cpu    cortex-a53
        .fpu    neon-fp-armv8
        .syntax unified         @ modern syntax

@ Constants for assembler
        .equ    aChar,0         @ offsets within
        .equ    anInt,4         @       each
        .equ    anotherChar,8   @          struct
        .equ    y,-36           @ y struct
        .equ    x,-24           @ x struct
        .equ    locals,24       @ space for the structs

@ Constant program data
        .section .rodata
        .align  2
displayX:
        .asciz        "x fields:\n"
displayY:
        .asciz        "y fields:\n"
dispAChar:
        .asciz        "         aChar = "
dispAnInt:
        .asciz        "         anInt = "
dispOtherChar:
        .asciz        "   anotherChar = "

@ The program
        .text
        .align  2
        .global main
        .type   main, %function
main:
        sub     sp, sp, 16      @ space for saving regs
                                @ (keeping 8-byte sp align)
        str     r4, [sp, 4]     @ save r4
        str     fp, [sp, 8]     @      fp
        str     lr, [sp, 12]    @      lr
        add     fp, sp, 12      @ set our frame pointer
        sub     sp, sp, locals  @ for the structs
        
@ fill the x struct
        add     r4, fp, x       @ address of x struct
        mov     r1, '1
        strb    r1, [r4, aChar] @ x.aChar = '1':
        mov     r1, 456
        str     r1, [r4, anInt] @ x.anInt = 456;
        mov     r1, '2
        strb    r1, [r4, anotherChar]  @ x.anotherChar = '2';

@ fill the y struct
        add     r4, fp, y       @ address of y struct
        mov     r1, 'a
        strb    r1, [r4, aChar] @ y.aChar = 'a':
        mov     r1, 123
        str     r1, [r4, anInt] @ y.anInt = 123;
        mov     r1, 'b
        strb    r1, [r4, anotherChar]  @ y.anotherChar = 'b';

@ display x struct
        add     r4, fp, x       @ address of x struct
        ldr     r0, displayXaddr
        bl      writeStr
        ldr     r0, dispACharAddr @ display aChar
        bl      writeStr
        ldrb    r0, [r4, aChar]
        bl      putChar
        bl      newLine
        ldr     r0, dispAnIntAddr @ display anInt
        bl      writeStr
        ldr     r0, [r4, anInt]
        bl      putDecInt
        bl      newLine
        ldr     r0, dispOtherCharAddr @ display anotherChar
        bl      writeStr
        ldrb    r0, [r4, anotherChar]
        bl      putChar
        bl      newLine

@ display y struct
        add     r4, fp, y       @ address of y struct
        ldr     r0, displayXaddr
        bl      writeStr
        ldr     r0, dispACharAddr @ display aChar
        bl      writeStr
        ldrb    r0, [r4, aChar]
        bl      putChar
        bl      newLine
        ldr     r0, dispAnIntAddr @ display anInt
        bl      writeStr
        ldr     r0, [r4, anInt]
        bl      putDecInt
        bl      newLine
        ldr     r0, dispOtherCharAddr @ display anotherChar
        bl      writeStr
        ldrb    r0, [r4, anotherChar]
        bl      putChar
        bl      newLine

        mov     r0, 0           @ return 0;
        add     sp, sp, locals  @ deallocate local var
        ldr     r4, [sp, 4]     @ restore r4
        ldr     fp, [sp, 8]     @         fp
        ldr     lr, [sp, 12]    @         lr
        add     sp, sp, 16      @         sp
        bx      lr              @ return
        
        .align  2
@ addresses of messages
displayXaddr:
        .word   displayX
displayYaddr:
        .word   displayY
dispACharAddr:
        .word   dispAChar
dispAnIntAddr:
        .word   dispAnInt
dispOtherCharAddr:
        .word   dispOtherChar

Listing 15.3.4. Two struct variables. (prog asm)

Instead of computing the address of each field, I use a register to point to the beginning of the struct. Then I can simply use the offset of each field, relative to this pointer, to access the field:

add     r4, fp, x       @ address of x struct
mov     r1, '1
strb    r1, [r4, aChar] @ x.aChar = '1':
mov     r1, 456
str     r1, [r4, anInt] @ x.anInt = 456;
mov     r1, '2
strb    r1, [r4, anotherChar]  @ x.anotherChar = '2';

Using the address of a struct can be useful if it is large. Then its address is often used in an array of structs or passing a struct as an argument to a function.

Section 15.3 Records (structs)

Section 15.3 Records (`struct`s)