Emulating RISC-V Bare-Metal on macOS with QEMU — A Retrospective

Recently I have ported a project and enabled cross-compilation for 32 bit RISC-V ISA.
There have been several issues during the process that I wish to highlight. The compilation is on MacOS Sequoia 15.5, and target platform is RV32IM emulated using QEMU.

Installing toolchain


    brew install riscv-software-src/riscv/riscv-gnu-toolchain
brew install qemu

Even though the name suggests a 64-bit executable, the toolchain allows you to specify the machine word size to generate a 32-bit binary.
The following binary utilities will be used for compilation and inspection:


    riscv64-unknown-elf-gcc
    riscv64-unknown-elf-g++
    riscv64-unknown-elf-nm
    riscv64-unknown-elf-objdump
  

Configure Compiler from CMake

I created gcc-riscv-none-eabi.cmake to contain all the reusable macros specific to RISC-V compilation.
It could be included by other CMakeLists.txt files to centralize configuration, reduce redundancy, and ensure consistent toolchain behavior across modules.


    # This cmake module: cross compilation configuration to compile for RISCV32I (target platform) on MACOS (host).

    # Avoids recursive inclusion.
    include_guard(GLOBAL)

    # Specify a generic embedded target system and RISCV architecture for cross-compilation.
    set(CMAKE_SYSTEM_NAME               Generic)
    set(CMAKE_SYSTEM_PROCESSOR          "riscv32")

    # Force CMake to treat specified compilers as valid GNU C/C++ compilers without detection.
    set(CMAKE_C_COMPILER_FORCED TRUE)
    set(CMAKE_CXX_COMPILER_FORCED TRUE)
    set(CMAKE_C_COMPILER_ID GNU)
    set(CMAKE_CXX_COMPILER_ID GNU)

    # Ensure the toolchain is installed via Homebrew on macOS if not already found.
    find_program(RISCV_GCC riscv64-unknown-elf-gcc)

    if(NOT RISCV_GCC)
        message(STATUS "riscv64-unknown-elf-gcc not found. Attempting to install via Homebrew...")
        execute_process(
            COMMAND brew install riscv-tools
            RESULT_VARIABLE BREW_RESULT
        )
        if(NOT BREW_RESULT EQUAL 0)
            message(FATAL_ERROR "Failed to install riscv-tools using Homebrew.")
        endif()
    else()
        message(STATUS "Found arm-none-eabi-gcc: ${RISCV_GCC}")
    endif()

    # Define tool chain path.
    set(TOOLCHAIN_PATH_MACOS            /opt/homebrew/bin)
    set(TOOLCHAIN_PREFIX                ${TOOLCHAIN_PATH_MACOS}/riscv64-unknown-elf-)

    set(CMAKE_C_COMPILER                ${TOOLCHAIN_PREFIX}gcc)
    set(CMAKE_ASM_COMPILER              ${CMAKE_C_COMPILER})
    set(CMAKE_CXX_COMPILER              ${TOOLCHAIN_PREFIX}g++)
    set(CMAKE_LINKER                    ${TOOLCHAIN_PREFIX}g++)
    set(CMAKE_OBJCOPY                   ${TOOLCHAIN_PREFIX}objcopy)
    set(CMAKE_SIZE                      ${TOOLCHAIN_PREFIX}size)

    # Set output executable suffix to .elf for ASM, C, and C++ targets.
    set(CMAKE_EXECUTABLE_SUFFIX_ASM     ".elf")
    set(CMAKE_EXECUTABLE_SUFFIX_C       ".elf")
    set(CMAKE_EXECUTABLE_SUFFIX_CXX     ".elf")

    set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)

    # target specific flags.
    set(TARGET_FLAGS "-march=rv32imac -mabi=ilp32")
    set(LINKER_SCRIPT_PATH ${CMAKE_CURRENT_LIST_DIR})

    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${TARGET_FLAGS}")

    # Enable additional C compiler warnings (-Wall, -Wextra, -Wpedantic) and optimize code by placing data and functions in separate sections (-fdata-sections, -ffunction-sections) for better linker garbage collection.
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wpedantic -fdata-sections -ffunction-sections")

    # Set different optimization and debug flags based on build type: Debug (-O0, -g3) and Release (-Os, -g0).
    if(CMAKE_BUILD_TYPE MATCHES Debug)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0 -g3")
    endif()
    if(CMAKE_BUILD_TYPE MATCHES Release)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Os -g0")
    endif()

    # Set assembler flags to enable preprocessor, generate dependency files (-MMD, -MP) for ASM files.
    set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -MMD -MP")

    # Set C++ compiler flags to disable RTTI, exceptions, and thread-safe statics for reduced binary size and performance in embedded systems.
    set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fno-rtti -fno-exceptions -fno-threadsafe-statics")

    set(CMAKE_C_LINK_FLAGS "${TARGET_FLAGS}")
    set(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -T \"${LINKER_SCRIPT_PATH}/riscv_qemu_virt.ld\"")
    set(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -Wl,-Map=${CMAKE_PROJECT_NAME}.map -Wl,--gc-sections")
    set(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -Wl,--start-group -lc -lm -Wl,--end-group")
    set(CMAKE_C_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -Wl,--print-memory-usage")
    set(CMAKE_CXX_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -Wl,--start-group -lstdc++ -lsupc++ -Wl,--end-group")
  

-march=rv32im: machine architecture as risc-v 32 bit and extensions (i: integer, m: mul/div)

-mabi=ilp32: machine abstraction binary interface as 32-bit int, long, and pointer — required to match libraries/toolchain

-T \"${LINKER_SCRIPT_PATH}/riscv_qemu_virt.ld\": specify a custom linker script

Including RISC-V Compiler Configuration in CMakeLists.txt

In CMakeLists.txt, include above cmake script conditionally by ARCH_DEFINED passed by user in cmake command.


  ...
  if (ARCH_DEFINED STREQUAL "ARMV7M")
    include(${CMAKE_CURRENT_LIST_DIR}/../../kernel/arch/armv7m/gcc-arm-none-eabi.cmake)
    set(LINKER_SCRIPT_PATH ${CMAKE_CURRENT_LIST_DIR}/../../kernel/arch/armv7m/STM32F767ZITx_FLASH.ld)
  elseif (ARCH_DEFINED STREQUAL "RV32I")
    include(${CMAKE_CURRENT_LIST_DIR}/../../kernel/arch/rv32i/gcc-riscv-none-eabi.cmake)
    set(LINKER_SCRIPT_PATH ${CMAKE_CURRENT_LIST_DIR}/../../kernel/arch/rv32i/riscv_qemu_virt.ld)
  else()
    message("'ARCH_DEFINED' is not recognized: ${ARCH_DEFINED}")
  endif()
  ...
  

The target architecture is passed to CMake:


    cmake .. -DARCH_DEFINED=RV32I -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
    make -j12
  

Resolving Linker Warning


    /opt/homebrew/Cellar/riscv-gnu-toolchain/main/lib/gcc/riscv64-unknown-elf/14.2.0/../../../../riscv64-unknown-elf/bin/ld:/Users/lhan/Projects/YesRTOS/example/multi_thread/../../kernel/arch/rv32i/riscv_qemu_virt.ld:7:  
warning: redeclaration of memory region `RAM'
/opt/homebrew/Cellar/riscv-gnu-toolchain/main/lib/gcc/riscv64-unknown-elf/14.2.0/../../../../riscv64-unknown-elf/bin/ld:/Users/lhan/Projects/YesRTOS/example/multi_thread/../../kernel/arch/rv32i/riscv_qemu_virt.ld:8:
warning: redeclaration of memory region `FLASH'

Unexpected warning despite RAM and FLASH being defined only once in the linker script.

Strategy: Enabling verbose mode in linker. I added below flag to CMAKE_C_LINK_FLAGS.

    ...
    # -Wl specifies verbose for linker.
    set(CMAKE_CXX_LINK_FLAGS "${CMAKE_C_LINK_FLAGS} -Wl,--verbose")
    ...
  

    ...
    [ 81%] Built target LibYesRTOSKernel
    [ 90%] Building CXX object CMakeFiles/multi_thread.dir/main.cpp.obj
    [100%] Linking CXX executable multi_thread.elf
    GNU ld (GNU Binutils) 2.43.1
      Supported emulations:
       elf64lriscv
       elf32lriscv
       elf64briscv
       elf32briscv
    opened script file /Users/lhan/Projects/YesRTOS/kernel/arch/rv32i/riscv_qemu_virt.ld
    opened script file /Users/lhan/Projects/YesRTOS/example/multi_thread/../../kernel/arch/rv32i/riscv_qemu_virt.ld
    /opt/homebrew/Cellar/riscv-gnu-toolchain/main/lib/gcc/riscv64-unknown-elf/14.2.0/../../../../riscv64-unknown-elf/bin/ld:/Users/lhan/Projects/YesRTOS/example/multi_thread/../../kernel/arch/rv32i/riscv_qemu_virt.ld:7: warning: redeclaration of memory region `RAM'
    /opt/homebrew/Cellar/riscv-gnu-toolchain/main/lib/gcc/riscv64-unknown-elf/14.2.0/../../../../riscv64-unknown-elf/bin/ld:/Users/lhan/Projects/YesRTOS/example/multi_thread/../../kernel/arch/rv32i/riscv_qemu_virt.ld:8: warning: redeclaration of memory region `FLASH'
    using external linker script: /Users/lhan/Projects/YesRTOS/example/multi_thread/../../kernel/arch/rv32i/riscv_qemu_virt.ld
  

The linker messages show that riscv_qemu_virt.ld is opened twice at two different level of the project.

Solution: removing duplicate -T switch added to the compilation command due to mistake.

Loading executable on QEMU


    qemu-system-riscv32 -machine virt -bios multi_thread.elf -nographic  -gdb tcp::1234 -s
  

From https://github.com/qemu/qemu/blob/master/hw/riscv/virt.c#L78-L99, 0x80000000 address is the location where the first instruction is to be executed.
In the linker script, same start address is mapped to contain .text, .data, .rodata, etc.

Clash of Memory Region on QEMU


    qemu-system-riscv32: Some ROM regions are overlapping
    These ROM regions might have been loaded by direct user request or by default.
    They could be BIOS/firmware images, a guest kernel, initrd or some other file loaded into guest memory.
    Check whether you intended to load all this guest code, and whether it has been built to load to the correct addresses.

    The following two regions overlap (in the memory address space):
      multi_thread.elf ELF program header segment 1 (addresses 0x0000000000000000 - 0x0000000000004238)
      mrom.reset (addresses 0x0000000000001000 - 0x0000000000001028)
  

From the error message, it looks like there's object allocated to address segment in range 0x0000000000000000 - 0x0000000000004238.
However, in the linker script, I have allocate everything to 0x80000000.
The question is what is being added to this clashed segment?



  OUTPUT_ARCH("riscv")
  ENTRY(_start)

  /* Memory layout */
  MEMORY
  {
    /* FLASH   (xrw) : ORIGIN = 0x20000000, LENGTH = 10K */
    FLASH (wrx)  : ORIGIN = 0x80000000, LENGTH = 128K
  }

  /* Stack and heap configuration */
  _estack           = ORIGIN(FLASH) + LENGTH(FLASH);  /* End of FLASH */
  _alloc_heap_size  = 0x100;                     /* 8KB heap */
  _alloc_stack_size = 0x800;                     /* No reserved stack (can be handled in startup) */

  /* Sections layout */
  SECTIONS
  {
    /* Code */
    .text :
    {
      . = ALIGN(4);
      KEEP(*(.init)) KEEP(*(.fini))
      *(.text) *(.text*)
      *(.rodata) *(.rodata*)
      *(.eh_frame)
      . = ALIGN(4);
      _etext = .;
    } > FLASH

    /* Initialized data section in FLASH (copied from FLASH at runtime) */
    .data :
    {
      . = ALIGN(4);
      _sdata = .;
      *(.data) *(.data*)
      . = ALIGN(4);
      _edata = .;
    } > FLASH

    /* Uninitialized data (zero-initialized) */
    .bss :
    {
      . = ALIGN(4);
      _sbss = .; __bss_start = _sbss;
      *(.bss) *(.bss*) *(COMMON)
      . = ALIGN(4);
      _ebss = .; __bss_end__ = _ebss;
    } > FLASH

    /* Heap and optional stack region */
    ._user_heap_stack :
    {
      . = ALIGN(8);
      PROVIDE(end = .);
      PROVIDE(_end = .);
      _ld_start_heap = .;
      . = . + _alloc_heap_size;
      _ld_end_heap = .;
      . = . + _alloc_stack_size;
      . = ALIGN(8);
    } > FLASH

    /* Strip unused standard library code (optional in embedded) */
    /DISCARD/ :
    {
      *(.comment)
      *(.note*)
      *(.eh_frame_hdr)
      *(.gcc_except_table)
      *(.riscv.attributes)
    }
  }
  
To verify the address assignment of key symbols, I used nm to inspect the binary.
Strategy: From the executable, inspect memory layout leveraging nm:

    riscv64-unknown-elf-nm -n multi_thread.elf
  

This prints the name map between symbols name to addresses, sorted.
In a symbol name map, A means an absolute symbol, T is a function in the text (code) section, B is an uninitialized global in the BSS section, and b is an uninitialized local symbol in BSS.


    00000000 A _sidata
    00000100 A _alloc_heap_size
    00000800 A _alloc_stack_size
    80000000 T _Z15thread0_routinev
    8000000a T _Z15thread1_routinev
    80000014 T _Z15thread2_routinev
    8000001e T _Z15thread3_routinev
    80000028 T main
    800000d2 T _ZN7YesRTOS20PreemptFIFOScheduler4initEv
    8000014e T _ZN7YesRTOS20PreemptFIFOScheduler10add_threadEPNS_6ThreadEj
    8000019e T _ZN7YesRTOS20PreemptFIFOScheduler5startEv
    80000242 T _ZN7YesRTOS20PreemptFIFOScheduler13schedule_nextEv
    ...
    80004d50 B errno
    80004d54 B __malloc_max_total_mem
    80004d58 B __malloc_max_sbrked_mem
    80004d5c B __malloc_top_pad
    80004d60 b heap_end.0
    80004d68 B _ld_start_heap
    80004e68 B _ld_end_heap
    80020000 T _estack
  

All _si_data, _alloc_heap_size, and _alloc_stack_size are Absolute addresses with value belonging to the clash region.


      00000000 A _sidata 
00000100 A _alloc_heap_size
00000800 A _alloc_stack_size

Strategy: Dump .map file to further investigate memory layout.

Both _ld_start_heap and _ld_end_heap are part of the ._user_heap_stack memory, addresses 0x80004d68 and 0x80004e68 are after 0x80000000 as expected.

However, the _sidata resides on address 0x00000000, in the clash region.
The _sidata = LOADADDR(.data) syntax comes from the linker script I have implemented.

The root cause is that _sidata in the linker script tries to retrieve the address of .data before .data is declared — a classic forward reference error in linker scripts.


    ...
    _sidata = LOADADDR(.data);
    .data : AT(_sidata)
    {
      . = ALIGN(4);
      _sdata = .;
      *(.data) *(.data*)
      . = ALIGN(4);
      _edata = .;
    } > FLASH
    ...
    

Solution: _sidata is removed from linker script, or it could be declared within .data section.

Finally Attaching GDB


  gdb multi_thread.elf
  

  target remote localhost:1234
  b _start
  b main
  

  lhan@Mac build % gdb multi_thread.elf
  GNU gdb (GDB) 16.3
  Copyright (C) 2024 Free Software Foundation, Inc.
  License GPLv3+: GNU GPL version 3 or later 
  This is free software: you are free to change and redistribute it.
  There is NO WARRANTY, to the extent permitted by law.
  Type "show copying" and "show warranty" for details.
  This GDB was configured as "--host=aarch64-apple-darwin24.4.0 --target=x86_64-apple-darwin20".
  Type "show configuration" for configuration details.
  For bug reporting instructions, please see:
  .
  Find the GDB manual and other documentation resources online at:
      .

  For help, type "help".
  Type "apropos word" to search for commands related to "word"...
  Reading symbols from multi_thread.elf...
  (gdb) target remote localhost:1234
  Remote debugging using localhost:1234
  0x00001000 in ?? ()
  (gdb) b _start
  Breakpoint 1 at 0x800009d0: file /Users/lhan/Projects/YesRTOS/kernel/arch/rv32i/startup_rv32i.s, line 6.
  (gdb) b main
  Breakpoint 2 at 0x80000034: file /Users/lhan/Projects/YesRTOS/example/multi_thread/main.cpp, line 82.
  (gdb) c
  Continuing.

  Breakpoint 1, _start () at /Users/lhan/Projects/YesRTOS/kernel/arch/rv32i/startup_rv32i.s:6
  6         la sp, _estack
  (gdb) si
  0x800009d4 in _start () at /Users/lhan/Projects/YesRTOS/kernel/arch/rv32i/startup_rv32i.s:6
  6         la sp, _estack
  (gdb) si
  7         call main
  (gdb) si
  main () at /Users/lhan/Projects/YesRTOS/example/multi_thread/main.cpp:81
  81      int main() {
  (gdb) q
  A debugging session is active.

          Inferior 1 [process 1] will be detached.

  Quit anyway? (y or n) n
  Not confirmed.
  (gdb) si
  0x8000002a      81      int main() {
  (gdb) disas
  Dump of assembler code for function main():
     0x80000028 <+0>:     addi    sp,sp,-80
  => 0x8000002a <+2>:     sw      ra,76(sp)
     0x8000002c <+4>:     sw      s0,72(sp)
     0x8000002e <+6>:     addi    s0,sp,80
     0x80000030 <+8>:     lui     t0,0xfffff
     0x80000032 <+10>:    add     sp,sp,t0
     0x80000034 <+12>:    addi    a4,s0,-1056
     0x80000038 <+16>:    lui     a5,0x80000
     0x8000003c <+20>:    mv      a2,a5
     0x80000040 <+24>:    li      a1,0
     0x80000042 <+26>:    mv      a0,a4
     0x80000044 <+28>:    jal     0x8000033e <_ZN7YesRTOS6ThreadC2EmPFvvE>
     0x80000046 <+30>:    addi    a5,s0,-1056
     0x8000004a <+34>:    li      a1,4
     0x8000004c <+36>:    mv      a0,a5
     0x8000004e <+38>:    jal     0x8000014e <_ZN7YesRTOS20PreemptFIFOScheduler10add_threadEPNS_6ThreadEj>
     0x80000050 <+40>:    lui     a5,0xfffff
     0x80000052 <+42>:    addi    a5,a5,2016 # 0xfffff7e0
     0x80000056 <+46>:    addi    a5,a5,-16
     0x80000058 <+48>:    add     a4,a5,s0
     0x8000005c <+52>:    lui     a5,0x80000
     0x80000060 <+56>:    addi    a2,a5,10 # 0x8000000a <_Z15thread1_routinev>
     0x80000064 <+60>:    li      a1,1
     0x80000066 <+62>:    mv      a0,a4
  

Based on initial observations in GDB, the executable is now running correctly on the RISC-V emulated target.

Lessons Learned