'Embedded Linux - Aarch64 system hanging after writing to large dynamically allocated buffer

I'm working on an aarch64 system with 512 MB of RAM, running 4.14, and I'm attempting to implement a firmware update mechanism for a USB component, but the image update size is around 100MB. So, I need to be able to buffer a good percentage of the total RAM available to the system, plus reserve a little extra to do some decompression/processing etc.

The issue is that allocating that memory and then accessing it seems to cause a system hang or a kernel panic on my device.

I can see through /proc/meminfo that I have enough available memory to satisfy the allocation (around 400MB), and so the malloc for that memory actually succeeds, but when I attempt to access the newly allocated buffer, it will freeze my system and sometimes generate this kernel panic:

[  659.987365] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000004
[  659.987365] 
[  659.996536] CPU: 0 PID: 1 Comm: procd Not tainted 4.14.241 #0
[  660.002291] Hardware name: dev (DT)
[  660.006130] Call trace:
[  660.008589]  dump_backtrace+0x0/0x168
[  660.012259]  show_stack+0x14/0x20
[  660.015581]  dump_stack+0xa4/0xc8
[  660.018900]  panic+0x13c/0x298
[  660.021960]  do_exit+0x1a8/0x8d8
[  660.025193]  SyS_exit_group+0x0/0x10
[  660.028776]  get_signal+0x4dc/0x570
[  660.032270]  do_signal+0x54c/0x5b8
[  660.035676]  do_notify_resume+0x80/0x248
[  660.039604]  work_pending+0x8/0x10
[  660.043012] Kernel Offset: disabled
[  660.046504] CPU features: 0x0002000
[  660.049994] Memory Limit: none
[  660.053054] Rebooting in 3 seconds..

Both of the following code examples cause the issue I'm experiencing:

#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>

#include <sys/mman.h>

volatile bool done = false;

static void handle_exit_signal(int signal) {
        (void)signal;
        done = true;
}

#define BUF_SIZE 100000000

int main(int argc, char *argv[]) {
        char *ptr = NULL;

        signal(SIGINT, handle_exit_signal);
        signal(SIGTERM, handle_exit_signal);

        /* this will always cause problems */
        ptr = mmap(NULL, BUF_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, -1, 0);
        if (ptr == MAP_FAILED) {
                printf("unable to map memory resources: %s\n", strerror(errno));
                return -1;
        }

        memset(ptr, 0xFF, BUF_SIZE);

        while(!done);

        munmap(ptr, BUF_SIZE);
        return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <stdbool.h>
#include <string.h>
#include <errno.h>

#include <sys/mman.h>

volatile bool done = false;

static void handle_exit_signal(int signal) {
        (void)signal;
        done = true;
}

#define BUF_SIZE 100000000

int main(int argc, char *argv[]) {
        char *ptr = NULL;

        signal(SIGINT, handle_exit_signal);
        signal(SIGTERM, handle_exit_signal);

        ptr = malloc(BUF_SIZE);
        if (!ptr) {
                printf("unable to allocate memory\n");
                return -1;
        }

        /* this is actually where the hang occurs when using malloc */
        memset(ptr, 0xFF, BUF_SIZE);

        while(!done);

        free(ptr);
        return 0;
}

I've also tried to disable memory overcommitment (vm.overcommit_memory=2 & vm.overcommit_ratio=100), and that didn't really seem to have an effect.

Is there something relatively obvious that I'm missing when trying to allocate large buffers on an embedded platform? Or could the issue be something more subtle?

Any help is appreciated.



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source