This is quick note on using the BPF_F_MMAPABLE map flag that was added end of 2019 by Andrii Nakryiko, see kernel patch. I noticed that there weren’t a lot of tutorials on that so I thought about writing this, then I realized I should have looked at kernel selftest, because it’s obviously tested there, but anyway.

Pre-requisites

This uses clang to compile the BPF progs, bpftool to generate the skeleton and libbpf to load them, I relied on the excellent libbpf-bootstrap repo. In short, for compiling the BPF progs you’ll need clang 11+, then you’ll need bpftool to generate the BPF skeletons, and finally you’ll also need a “recent” version of libbpf, which depends on libelf and zlib. See more details and instruction in libbpf-bootstrap deps.

Be aware that some commonly used distros ship an old version of libbpf, like Ubuntu 22.04 for example, that ships v0.5.0, see more details on libbpf distribution.

Write a BPF program

Let’s write a simple program that we will run on-demand via BPF_PROG_TEST_RUN. We’ll add an array map of size 16. Note that its actual memory impact will be larger since, in order for the map to be mmapable, it will need at least to use PAGE_SIZE.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>

char LICENSE[] SEC("license") = "Dual BSD/GPL";

struct {
	__uint(type, BPF_MAP_TYPE_ARRAY);
	__uint(max_entries, 1);
	__type(key, __u32);
	__type(value, char[16]);
	__uint(map_flags, BPF_F_MMAPABLE);
} buf_map SEC(".maps");

SEC("raw_tp/does-not-exist")
int test_prog()
{
	char *buf;
	__u32 zero = 0;
	buf = bpf_map_lookup_elem(&buf_map, &zero);
	if (!buf) {
		bpf_printk("failed to open buf");
		return 0;
	}
 	/* write only when the buffer is empty */
	if (*buf == 0)
		__builtin_memcpy(buf, "hello from bpf",
				 sizeof("hello from bpf"));
	bpf_printk("buf: %s", buf);
	return 0;
}

Compile this program using clang

clang -O2 -g -target bpf -Wall -Werror -c mmap.bpf.c -o mmap.bpf.o

Write a loader with libbpf

Next step is using bpftool for generating the libbpf skeleton. Note that you can now download static build of bpftool on the releases page if you are not happy with your distribution package (hehe).

bpftool gen skeleton mmap.bpf.o > mmap.skel.h

Now write the user space program that will:

  • Load the BPF prog;
  • Memory map the BPF map;
  • Run the BPF prog;
  • Print the content of the map from the memory mapping;
  • Modify the content of the map from the memory mapping.
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2021 Sartura
 * Based on minimal.c by Facebook */

#include <stdio.h>
#include <sys/mman.h>
#include <sys/user.h>
#include <bpf/bpf.h>

#include "mmap.skel.h"

static int libbpf_print_fn(enum libbpf_print_level level, const char *format,
			   va_list args)
{
	return vfprintf(stderr, format, args);
}

void print_buf(char *map_data)
{
	for (size_t i = 0; i < 16; i++) {
		printf("%02x ", map_data[i]);
	}
	for (size_t i = 0; i < 16; i++) {
		printf("%c", map_data[i]);
	}
	printf("\n");
}

int main(int argc, char **argv)
{
	struct mmap_bpf *skel;
	int err;

	/* set up libbpf errors and debug info callback */
	libbpf_set_print(libbpf_print_fn);

	/* open load and verify the bpf prog */
	skel = mmap_bpf__open_and_load();
	if (!skel) {
		perror("failed to open BPF skeleton");
		return EXIT_FAILURE;
	}

	/* mmap the array map */
	int map_fd = bpf_map__fd(skel->maps.buf_map);
	void *map_data = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
			      MAP_SHARED, map_fd, 0);
	if (map_data == MAP_FAILED) {
		perror("mmap failed");
		goto cleanup;
	}

	print_buf(map_data);

	/* run the bpf prog */
	struct bpf_test_run_opts opts = {
		// required, or else bpf_prog_test_run_opts will fail
		.sz = sizeof(struct bpf_test_run_opts),
	};
	int prog_fd = bpf_program__fd(skel->progs.test_prog);
	err = bpf_prog_test_run_opts(prog_fd, &opts);
	if (err) {
		perror("failed to run BPF prog");
		goto cleanup;
	}

	print_buf(map_data);

	/* modify the content of the map through the memory mapping */
	memcpy(map_data, "hello from user", 16);

	print_buf(map_data);

	/* run the bpf prog again */
	err = bpf_prog_test_run_opts(prog_fd, &opts);
	if (err) {
		perror("failed to run BPF prog");
		goto cleanup;
	}

cleanup:
	mmap_bpf__destroy(skel);
	return -err;
}

You can compile your loader with:

gcc mmap_laoder.c -lbpf -o mmap_loader

For “reasons”, in my situation I ended up recompiling the latest libbpf and using the static build for my loader, it’s quite straighforward to do: you can clone libbpf mirror from github.com/libbpf/libbpf and then run make from the src folder. Then you pass the libbpf.a file to your compiler with the -lelf and -lz lib args.

gcc mmap_loader.c  ../libbpf/src/libbpf.a -lelf -lz -o mmap_loader

Running and be amazed that it works

First in another terminal, open the trace_pipe to get the bpf_printk from the BPF prog:

sudo cat /sys/kernel/tracing/trace_pipe

And then run your loader:

sudo ./mmap_loader

On the pipe side you should see something similar to:

     mmap_loader-14933   [003] .....  8469.272544: bpf_trace_printk: buf: hello from bpf
     mmap_loader-14933   [003] .....  8469.272556: bpf_trace_printk: buf: hello from user

And from your loader, a bunch of libbpf output first from stderr:

libbpf: loading object 'mmap_bpf' from buffer
libbpf: elf: section(3) raw_tp/does-not-exist, size 408, link 0, flags 6, type=1
libbpf: sec 'raw_tp/does-not-exist': found program 'test_prog' at insn offset 0 (0 bytes), code size 51 insns (408 bytes)
libbpf: elf: section(4) .relraw_tp/does-not-exist, size 48, link 28, flags 40, type=9
libbpf: elf: section(5) license, size 13, link 0, flags 3, type=1
libbpf: license of mmap_bpf is Dual BSD/GPL
libbpf: elf: section(6) .maps, size 40, link 0, flags 3, type=1
libbpf: elf: section(7) .rodata, size 27, link 0, flags 2, type=1
libbpf: elf: section(8) .rodata.str1.1, size 15, link 0, flags 32, type=1
libbpf: elf: section(18) .BTF, size 1065, link 0, flags 0, type=1
libbpf: elf: section(20) .BTF.ext, size 240, link 0, flags 0, type=1
libbpf: elf: section(28) .symtab, size 432, link 1, flags 0, type=2
libbpf: looking for externs among 18 symbols...
libbpf: collected 0 externs total
libbpf: map 'buf_map': at sec_idx 6, offset 0.
libbpf: map 'buf_map': found type = 2.
libbpf: map 'buf_map': found key [8], sz = 4.
libbpf: map 'buf_map': found value [12], sz = 16.
libbpf: map 'buf_map': found max_entries = 1.
libbpf: map 'buf_map': found map_flags = 0x400.
libbpf: map 'mmap_bpf.rodata' (global data): at sec_idx 7, offset 0, flags 80.
libbpf: map 1 is "mmap_bpf.rodata"
libbpf: map '.rodata.str1.1' (global data): at sec_idx 8, offset 0, flags 80.
libbpf: map 2 is ".rodata.str1.1"
libbpf: sec '.relraw_tp/does-not-exist': collecting relocation for section(3) 'raw_tp/does-not-exist'
libbpf: sec '.relraw_tp/does-not-exist': relo #0: insn #4 against 'buf_map'
libbpf: prog 'test_prog': found map 0 (buf_map, sec 6, off 0) for insn #4
libbpf: sec '.relraw_tp/does-not-exist': relo #1: insn #8 against '.rodata'
libbpf: prog 'test_prog': found data map 1 (mmap_bpf.rodata, sec 7, off 0) for insn 8
libbpf: sec '.relraw_tp/does-not-exist': relo #2: insn #44 against '.rodata'
libbpf: prog 'test_prog': found data map 1 (mmap_bpf.rodata, sec 7, off 0) for insn 44
libbpf: object 'mmap_bpf': failed (-22) to create BPF token from '/sys/fs/bpf', skipping optional step...
libbpf: map 'buf_map': created successfully, fd=3
libbpf: map 'mmap_bpf.rodata': created successfully, fd=4
libbpf: map '.rodata.str1.1': created successfully, fd=5

And the stdout output:

00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
68 65 6c 6c 6f 20 66 72 6f 6d 20 62 70 66 00 00 hello from bpf
68 65 6c 6c 6f 20 66 72 6f 6d 20 75 73 65 72 00 hello from user

Conclusion

Now you can combine that with vmsplice(2) and potentially BPF arena to have efficient zero-copy outputs from your BPF progs.