The starting point of this tutorial is the following brainfsck interpreter:
#include <stdio.h> #include <stdlib.h> #define TAPE_SIZE 30000 #define MAX_NESTING 100 typedef struct bf_state { unsigned char* tape; unsigned char (*get_ch)(struct bf_state*); void (*put_ch)(struct bf_state*, unsigned char); } bf_state_t; #define bad_program(s) exit(fprintf(stderr, "bad program near %.16s: %s\n", program, s)) static void bf_interpret(const char* program, bf_state_t* state) { const char* loops[MAX_NESTING]; int nloops = 0; int n; int nskip = 0; unsigned char* tape_begin = state->tape - 1; unsigned char* ptr = state->tape; unsigned char* tape_end = state->tape + TAPE_SIZE - 1; for(;;) { switch(*program++) { case '<': for(n = 1; *program == '<'; ++n, ++program); if(!nskip) { ptr -= n; while(ptr <= tape_begin) ptr += TAPE_SIZE; } break; case '>': for(n = 1; *program == '>'; ++n, ++program); if(!nskip) { ptr += n; while(ptr > tape_end) ptr -= TAPE_SIZE; } break; case '+': for(n = 1; *program == '+'; ++n, ++program); if(!nskip) *ptr += n; break; case '-': for(n = 1; *program == '-'; ++n, ++program); if(!nskip) *ptr -= n; break; case ',': if(!nskip) *ptr = state->get_ch(state); break; case '.': if(!nskip) state->put_ch(state, *ptr); break; case '[': if(nloops == MAX_NESTING) bad_program("Nesting too deep"); loops[nloops++] = program; if(!*ptr) ++nskip; break; case ']': if(nloops == 0) bad_program("] without matching ["); if(*ptr) program = loops[nloops-1]; else --nloops; if(nskip) --nskip; break; case 0: if(nloops != 0) program = "<EOF>", bad_program("[ without matching ]"); return; } } } static void bf_putchar(bf_state_t* s, unsigned char c) { putchar((int)c); } static unsigned char bf_getchar(bf_state_t* s) { return (unsigned char)getchar(); } static void bf_run(const char* program) { bf_state_t state; unsigned char tape[TAPE_SIZE] = {0}; state.tape = tape; state.get_ch = bf_getchar; state.put_ch = bf_putchar; bf_interpret(program, &state); } int main(int argc, char** argv) { if(argc == 2) { long sz; char* program; FILE* f = fopen(argv[1], "r"); if(!f) { fprintf(stderr, "Cannot open %s\n", argv[1]); return 1; } fseek(f, 0, SEEK_END); sz = ftell(f); program = (char*)malloc(sz + 1); fseek(f, 0, SEEK_SET); program[fread(program, 1, sz, f)] = 0; fclose(f); bf_run(program); return 0; } else { fprintf(stderr, "Usage: %s INFILE.bf\n", argv[0]); return 1; } }
Over the course of this tutorial, we'll use DynASM to transform this interpreter into a brainfsck JIT compiler, therein hopefully making it faster.
To follow along, clone this repository and start from bf_c.c
:
git clone https://github.com/corsix/dynasm-doc.git cd dynasm-doc git submodule update --init cp bf_c.c tutorial.c
The functionality of the starting point can be checked by running the following, which should very slowly render the Mandelbrot set:
gcc -o tutorial tutorial.c ./tutorial mandelbrot.bf
Before the real fun can begin, we need to lay a few pieces of groundwork.
First of all, we need to #include
the DynASM headers:
#include "luajit-2.0/dynasm/dasm_proto.h" #include "luajit-2.0/dynasm/dasm_x86.h"
As described in more detail on the reference page, dasm_proto.h
defines the DynASM API, and dasm_x86.h
contains the implementation
of said API (for x86 / x64).
Next, we'll rename bf_interpret
to bf_compile
and change
its type signature:
static void bf_interpret(const char* program, bf_state_t* state) static void(* bf_compile(const char* program) )(bf_state_t*)
Where previously bf_interpret
accepted both a const char*
and a bf_state_t*
, bf_compile
now accepts just the
const char*
portion, and will return a function pointer to the JIT-compiled
code.
The code which calls bf_interpret
also needs updating at this point:
bf_interpret(program, &state); bf_compile(program)(&state);
With the groundwork done, the next task is creating and initialising a DynASM state.
We'll need a variable of type dasm_State*
to contain the DynASM state, and
two extra variables whose purpose will be explained later. We can also get rid of an
interpreter variable at the same time:
int nskip = 0; dasm_State* d; unsigned npc = 8; unsigned nextpc = 0;
We now reach the first of many DynASM directives, which are instructions to the DymASM preprocessor. In this case, we need to instruct it as to which architecture we're generating machine code for, which will either be x86 or x64:
|.if X64 |.arch x64 |.else |.arch x86 |.endif
Lines starting with a vertical bar will be picked up by the DynASM preprocessor. The
.if
, .else
, and .endif
directives will be handled
by DynASM's prepreprocessor, with semantics similar to C's preprocessor #if
,
#else
, and #endif
. As a result, exactly one .arch
directive will take effect.
Having declared a variable of type dasm_State*
, we need to actually
allocate a dasm_State
to put in it, which is done by calling dasm_init
:
|.section code dasm_init(&d, DASM_MAXSECTION);
Note that as well as a dasm_State**
, dasm_init
also requires
an integer argument, which specifies the number of sections of machine code that'll be
generated. We only need one code section, so we invoke the .section
directive
with one argument, which the DynASM preprocessor will rewrite to #define DASM_MAXSECTION 1
(amongst other things). This is a slightly convoluted way of passing 1
as the
second argument to dasm_init
, but is a good habit in case we need more sections
in the future.
dasm_init
will have allocated a dasm_State
, but won't have fully
initialised it. A few more calls are required to fully initialise the state, the first of
which is dasm_setupglobal
:
|.globals lbl_ void* labels[lbl__MAX]; dasm_setupglobal(&d, labels, lbl__MAX);
The .globals
directive with the argument lbl_
will be rewritten by the
DynASM preprocessor to become an enum
containing several things, one of which will
be lbl__MAX
. This value must be passed to dasm_setupglobal
, along with
an array of void*
of equal extent. We'll make use of this labels
array
much later.
The next call in the initialisation sequence is to dasm_setup
:
|.actionlist bf_actions dasm_setup(&d, bf_actions);
The .actionlist
directive with the argument bf_actions
will be rewritten
by the DynASM preprocessor to become a variable called bf_actions
, and this variable
must be passed to dasm_setup
.
For a lot of use cases, the dasm_State
would be fully initialised at this point.
However, as we'll be making use of dynamic labels, there is one more initialisation call to
make, which is to dasm_growpc
:
dasm_growpc(&d, npc);
We're passing npc
as an argument, which is a variable we declared earlier. Said
variable represents the number of dynamic labels we've allocated, while the related variable
nextpc
represents the number of dynamic labels we've used. These dynamic labels
will come into play when compiling [
and ]
.
Before we start emitting machine code, it is useful to define a few abstractions. The first few abstractions are to give slightly more meaningful names to the registers we'll be using:
Abstraction | Corresponding Interpreter Variable | Definition |
---|---|---|
aState | state | ebx or rbx |
aPtr | ptr | ebp or r12 |
aTapeBegin | tape_begin | esi or rsi or r13 |
aTapeEnd | tape_end | edi or rdi or r14 |
The next group of useful abstractions relate to function calls:
Abstraction | Description |
---|---|
prologue | Set up the stack frame, and set aState from the passed parameter. |
prepcall1 arg1 | Prepare to call a function with one argument, arg1 . |
prepcall2 arg1, arg2 | Prepare to call a function with two arguments, arg1 and arg2 . |
postcall n | Do cleanup after a call to a function with n arguments. |
epilogue | Tear down the stack frame. |
All of these abstractions are defined by means of .define
(for simple substitutions) or .macro
(for more
complex constructions), and have different definitions for each of x86, x64 POSIX, and x64 Windows:
|.if X64 |.define aPtr, rbx |.define aState, r12 |.if WIN |.define aTapeBegin, rsi |.define aTapeEnd, rdi |.define rArg1, rcx |.define rArg2, rdx |.else |.define aTapeBegin, r13 |.define aTapeEnd, r14 |.define rArg1, rdi |.define rArg2, rsi |.endif |.macro prepcall1, arg1 | mov rArg1, arg1 |.endmacro |.macro prepcall2, arg1, arg2 | mov rArg1, arg1 | mov rArg2, arg2 |.endmacro |.define postcall, .nop |.macro prologue | push aPtr | push aState | push aTapeBegin | push aTapeEnd | push rax | mov aState, rArg1 |.endmacro |.macro epilogue | pop rax | pop aTapeEnd | pop aTapeBegin | pop aState | pop aPtr | ret |.endmacro |.else |.define aPtr, ebx |.define aState, ebp |.define aTapeBegin, esi |.define aTapeEnd, edi |.macro prepcall1, arg1 | push arg1 |.endmacro |.macro prepcall2, arg1, arg2 | push arg2 | push arg1 |.endmacro |.macro postcall, n | add esp, 4*n |.endmacro |.macro prologue | push aPtr | push aState | push aTapeBegin | push aTapeEnd | mov aState, [esp+20] |.endmacro |.macro epilogue | pop aTapeEnd | pop aTapeBegin | pop aState | pop aPtr | ret 4 |.endmacro |.endif
Having made all of these architecture and operating system dependent definitions for the DynASM preprocessor, it is useful to check that the architecture and operating system specified to the DynASM preprocessor match the architecture and operating system as known by the C preprocessor, which is done by the following:
||#if ((defined(_M_X64) || defined(__amd64__)) != X64) || (defined(_WIN32) != WIN) #error "Wrong DynASM flags used: pass `-D X64` and/or `-D WIN` to dynasm.lua as appropriate" #endif
Note the line starting with two vertical bars: such lines undergo .define
substitution by the DynASM
prepreprocessor (and can particicpate in .macro
definitions), but are otherwise unchanged by the DynASM
preprocessor. In particular, if X64
and/or WIN
are defined (to 1
) at DynASM prepreprocessing time,
then they'll be substituted for 1
. If they're not defined at DynASM prepreprocessing time, they'll be
left unchanged, and be substituated for 0
by the C preprocessor.
With all of that done, we're finally ready to emit some machine code.
The first thing we need to emit is a prologue, which replaces some of the initialisation previously done by the interpreter:
unsigned char* tape_begin = state->tape - 1; unsigned char* ptr = state->tape; unsigned char* tape_end = state->tape + TAPE_SIZE - 1; |.type state, bf_state_t, aState dasm_State** Dst = &d; |.code |->bf_main: | prologue | mov aPtr, state->tape | lea aTapeBegin, [aPtr-1] | lea aTapeEnd, [aPtr+TAPE_SIZE-1]
The first item of interest here is the .type
directive, which subsequently allows us to write state->tape
as a shorthand for [aState + offsetof(bf_state_t,tape)]
.
The next line defines a variable called Dst
, and initialises it to &d
. This is done because the
DynASM preprocessor will rewrite the subsequent lines to calls of the form dasm_put(Dst, ...)
, and like the
previous calls we've made to dasm_
functions, the first argument wants to be &d
.
The next line contains a .code
directive. Said directive was introduced by the prior .section code
directive, and states that subsequently emitted machine code should be placed in the code
section (which happens
to be the one and only section we're working with).
After this, we define the global label ->bf_main
. After we've finished emitting machine code, we'll
obtain the address of this global label and turn it into a function pointer.
We then invoke the prologue
macro as defined earlier, which will cause a few instructions to be emitted.
Finally, we have a mov
instruction and two lea
instructions, which directly correspond to the
removed interpreter code. As mentioned, the state->tape
specified as an operand to mov
is
recognised as shorthand for [aState + offsetof(bf_state_t,tape)]
. Note that both offsetof(bf_state_t,tape)
and TAPE_SIZE-1
(part of the lea
operand) are so-called encoding-time constants: DynASM doesn't
understand what they mean, so it defers their computation to the C compiler. Both of these values happen to be compile-time
constants in C, but encoding-time constants don't have to be compile-time constants (we'll see examples of this in just a minute).
We've reached the guts of the interpreter now, and the first job is to replace the interpreter's handling of <
with
the compiler's interpretation:
if(!nskip) { ptr -= n; while(ptr <= tape_begin) ptr += TAPE_SIZE; } | sub aPtr, n%TAPE_SIZE | cmp aPtr, aTapeBegin | ja >1 | add aPtr, TAPE_SIZE |1:
Note that the compiler doesn't have a notion of skipping over code like the interpreter does, so the outer if
is
dropped entirely. After that, ptr -= n;
and some iterations of the subsequent loop have become | sub aPtr, n%TAPE_SIZE
.
Note that n%TAPE_SIZE
is an encoding-time constant which isn't a compile-time constant in C: DynASM still doesn't
understand what the operand means, but in this case the final value of the operand is computed when bf_compile
is running.
After performing some iterations of the loop at compile time by means of %TAPE_SIZE
, there might still be one iteration
to perform at runtime, which correspond to the cmp
, ja
, and add
instructions. Note that
the syntax >1
jumps forward to the next definition of the local label 1
, which is just after the add
instruction.
A similar transformation happens for >
, but with add
and sub
transposed:
if(!nskip) { ptr += n; while(ptr > tape_end) ptr -= TAPE_SIZE; } | add aPtr, n%TAPE_SIZE | cmp aPtr, aTapeEnd | jbe >1 | sub aPtr, TAPE_SIZE |1:
The next instruction to be rewritten is +
, which is relatively simple:
if(!nskip) *ptr += n; | add byte [aPtr], n
The only notable thing is the presence of the memory size specifier byte
before the memory operand [aPtr]
. As neither
the memory operand nor the immediate operand have a natural operand size, DynASM needs to be explicitly told. Note that our prior uses of
memory operands didn't require memory size specifiers: lea
instructions don't require them because the memory operands aren't memory
accesses, and mov aPtr, state->tape
didn't require one because the size of the memory operand was inferred to be equal to size of
the register operand.
The handling of -
is similar:
if(!nskip) *ptr -= n; | sub byte [aPtr], n
The next job involves the logic for ,
(read char) and .
(write char), which are notable because they involve
calling other functions. The first of these is ,
:
if(!nskip) *ptr = state->get_ch(state); | prepcall1 aState | call aword state->get_ch | postcall 1 | mov byte [aPtr], al
Note the invocations of the prepcall1
and postcall
abstractions that we defined earlier. Also note that
state->get_ch
is shorthand for [aState + offsetof(bf_state_t,get_ch)]
courtesy of the earlier .type
directive, and that memory size specifiers are still required when these shorthands are used: the size of the memory operand will
not be automatically inferred to be equal to the size of the named C structure member. The aword
(address-sized word)
specifier refers to either 4 bytes x86 or 8 bytes x64.
The transformation of .
is similar:
if(!nskip) state->put_ch(state, *ptr); | movzx r0, byte [aPtr] | prepcall2 aState, r0 | call aword state->put_ch | postcall 2
Note that r0
is used as a register operand: it refers to either eax
x86 or rax
x64.
We now reach the really interesting instructions: [
and ]
. The first of these has a rather complex transformation:
loops[nloops++] = program; if(!*ptr) ++nskip; if(program[0] == '-' && program[1] == ']') { program += 2; | xor eax, eax | mov byte [aPtr], al } else { if(nextpc == npc) { npc *= 2; dasm_growpc(&d, npc); } | cmp byte [aPtr], 0 | jz =>nextpc+1 |=>nextpc: loops[nloops++] = nextpc; nextpc += 2; }
First of all, we now recognise the instruction sequence [-]
and emit optimised machine code for it. Having excluded this specific case, the
general case requires two dynamic labels: one for jumping from [
to after ]
(previously done by means of the nskip
variable in the interpreter), and one for jumping from ]
to after [
(previously done by means of the loops
stack).
If the number of dynamic labels we've used equals the number we've allocated, then we call dasm_growpc
in order to allocate some more. We then
emit a cmp
instruction, which does the obvious thing. If the byte at [aPtr]
was zero, we jump to the dynamic label =>nextpc+1
(which we'll subsequently define when we see ]
). After this, we define the dynamic label =>nextpc
(which is what ]
will
jump back to). Note that both nextpc+1
and nextpc
are encoding-time constants.
The second half of the magic comes from the handling of ]
:
if(*ptr) program = loops[nloops-1]; else --nloops; if(nskip) --nskip; --nloops; | cmp byte [aPtr], 0 | jnz =>loops[nloops] |=>loops[nloops]+1:
Note the conditional jump to the dynamic label =>loops[nloops]
(which jumps to the =>nextpc
defined by the corresponding [
),
and the definition of the dynamic label =>loops[nloops]+1
(which is jumped to by jz =>nextpc+1
emitted by the corresponding [
).
Having covered all of the instructions, all that is left is handling the epilogue and extracting a function pointer from DynASM:
return; | epilogue link_and_encode(&d); dasm_free(&d); return (void(*)(bf_state_t*))labels[lbl_bf_main];
The first of these lines invokes the epilogue
macro we defined earlier. The next line calls out to link_and_encode
, which
is a function we'll define in just a minute. We then call dasm_free
, which frees the DynASM state. Finally, we take the labels
array we previously defined and passed to dasm_setupglobal
, index it with lbl_bf_main
(which was defined by .globals lbl_
and corresponds
to the global label ->bf_main
), and cast it to a function pointer.
The link_and_encode
function is defined as follows:
#if _WIN32 #include <Windows.h> #else #include <sys/mman.h> #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) #define MAP_ANONYMOUS MAP_ANON #endif #endif static void* link_and_encode(dasm_State** d) { size_t sz; void* buf; dasm_link(d, &sz); #ifdef _WIN32 buf = VirtualAlloc(0, sz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); #else buf = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); #endif dasm_encode(d, buf); #ifdef _WIN32 {DWORD dwOld; VirtualProtect(buf, sz, PAGE_EXECUTE_READ, &dwOld); } #else mprotect(buf, sz, PROT_READ | PROT_EXEC); #endif return buf; }
The particularly interesting calls are to dasm_link
and dasm_encode
. The remaining calls use operating system functionality
to allocate a block of read-write memory and then convert said block to read-execute. Note that we could have allocated a block of read-write-execute
memory, but it is generally considered bad form to have memory which is writable and executable at the same time.
If you've been following along, your tutorial.c
should now correspond to the following:
||#if ((defined(_M_X64) || defined(__amd64__)) != X64) || (defined(_WIN32) != WIN) #error "Wrong DynASM flags used: pass `-D X64` and/or `-D WIN` to dynasm.lua as appropriate" #endif #include <stdio.h> #include <stdlib.h> #include "luajit-2.0/dynasm/dasm_proto.h" #include "luajit-2.0/dynasm/dasm_x86.h" #if _WIN32 #include <Windows.h> #else #include <sys/mman.h> #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) #define MAP_ANONYMOUS MAP_ANON #endif #endif static void* link_and_encode(dasm_State** d) { size_t sz; void* buf; dasm_link(d, &sz); #ifdef _WIN32 buf = VirtualAlloc(0, sz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); #else buf = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); #endif dasm_encode(d, buf); #ifdef _WIN32 {DWORD dwOld; VirtualProtect(buf, sz, PAGE_EXECUTE_READ, &dwOld); } #else mprotect(buf, sz, PROT_READ | PROT_EXEC); #endif return buf; } #define TAPE_SIZE 30000 #define MAX_NESTING 100 typedef struct bf_state { unsigned char* tape; unsigned char (*get_ch)(struct bf_state*); void (*put_ch)(struct bf_state*, unsigned char); } bf_state_t; #define bad_program(s) exit(fprintf(stderr, "bad program near %.16s: %s\n", program, s)) static void(* bf_compile(const char* program) )(bf_state_t*) { unsigned loops[MAX_NESTING]; int nloops = 0; int n; dasm_State* d; unsigned npc = 8; unsigned nextpc = 0; |.if X64 |.arch x64 |.else |.arch x86 |.endif |.section code dasm_init(&d, DASM_MAXSECTION); |.globals lbl_ void* labels[lbl__MAX]; dasm_setupglobal(&d, labels, lbl__MAX); |.actionlist bf_actions dasm_setup(&d, bf_actions); dasm_growpc(&d, npc); |.if X64 |.define aPtr, rbx |.define aState, r12 |.if WIN |.define aTapeBegin, rsi |.define aTapeEnd, rdi |.define rArg1, rcx |.define rArg2, rdx |.else |.define aTapeBegin, r13 |.define aTapeEnd, r14 |.define rArg1, rdi |.define rArg2, rsi |.endif |.macro prepcall1, arg1 | mov rArg1, arg1 |.endmacro |.macro prepcall2, arg1, arg2 | mov rArg1, arg1 | mov rArg2, arg2 |.endmacro |.define postcall, .nop |.macro prologue | push aPtr | push aState | push aTapeBegin | push aTapeEnd | push rax | mov aState, rArg1 |.endmacro |.macro epilogue | pop rax | pop aTapeEnd | pop aTapeBegin | pop aState | pop aPtr | ret |.endmacro |.else |.define aPtr, ebx |.define aState, ebp |.define aTapeBegin, esi |.define aTapeEnd, edi |.macro prepcall1, arg1 | push arg1 |.endmacro |.macro prepcall2, arg1, arg2 | push arg2 | push arg1 |.endmacro |.macro postcall, n | add esp, 4*n |.endmacro |.macro prologue | push aPtr | push aState | push aTapeBegin | push aTapeEnd | mov aState, [esp+20] |.endmacro |.macro epilogue | pop aTapeEnd | pop aTapeBegin | pop aState | pop aPtr | ret 4 |.endmacro |.endif |.type state, bf_state_t, aState dasm_State** Dst = &d; |.code |->bf_main: | prologue | mov aPtr, state->tape | lea aTapeBegin, [aPtr-1] | lea aTapeEnd, [aPtr+TAPE_SIZE-1] for(;;) { switch(*program++) { case '<': for(n = 1; *program == '<'; ++n, ++program); | sub aPtr, n%TAPE_SIZE | cmp aPtr, aTapeBegin | ja >1 | add aPtr, TAPE_SIZE |1: break; case '>': for(n = 1; *program == '>'; ++n, ++program); | add aPtr, n%TAPE_SIZE | cmp aPtr, aTapeEnd | jbe >1 | sub aPtr, TAPE_SIZE |1: break; case '+': for(n = 1; *program == '+'; ++n, ++program); | add byte [aPtr], n break; case '-': for(n = 1; *program == '-'; ++n, ++program); | sub byte [aPtr], n break; case ',': | prepcall1 aState | call aword state->get_ch | postcall 1 | mov byte [aPtr], al break; case '.': | movzx r0, byte [aPtr] | prepcall2 aState, r0 | call aword state->put_ch | postcall 2 break; case '[': if(nloops == MAX_NESTING) bad_program("Nesting too deep"); if(program[0] == '-' && program[1] == ']') { program += 2; | xor eax, eax | mov byte [aPtr], al } else { if(nextpc == npc) { npc *= 2; dasm_growpc(&d, npc); } | cmp byte [aPtr], 0 | jz =>nextpc+1 |=>nextpc: loops[nloops++] = nextpc; nextpc += 2; } break; case ']': if(nloops == 0) bad_program("] without matching ["); --nloops; | cmp byte [aPtr], 0 | jnz =>loops[nloops] |=>loops[nloops]+1: break; case 0: if(nloops != 0) program = "<EOF>", bad_program("[ without matching ]"); | epilogue link_and_encode(&d); dasm_free(&d); return (void(*)(bf_state_t*))labels[lbl_bf_main]; } } } static void bf_putchar(bf_state_t* s, unsigned char c) { putchar((int)c); } static unsigned char bf_getchar(bf_state_t* s) { return (unsigned char)getchar(); } static void bf_run(const char* program) { bf_state_t state; unsigned char tape[TAPE_SIZE] = {0}; state.tape = tape; state.get_ch = bf_getchar; state.put_ch = bf_putchar; bf_compile(program)(&state); } int main(int argc, char** argv) { if(argc == 2) { long sz; char* program; FILE* f = fopen(argv[1], "r"); if(!f) { fprintf(stderr, "Cannot open %s\n", argv[1]); return 1; } fseek(f, 0, SEEK_END); sz = ftell(f); program = (char*)malloc(sz + 1); fseek(f, 0, SEEK_SET); program[fread(program, 1, sz, f)] = 0; fclose(f); bf_run(program); return 0; } else { fprintf(stderr, "Usage: %s INFILE.bf\n", argv[0]); return 1; } }
If you've not been following that closely, you can reach the same state by doing:
git clone https://github.com/corsix/dynasm-doc.git cd dynasm-doc git submodule update --init cp bf_dynasm.c tutorial.c
In order to compile tutorial.c
, we first need to run it through the DynASM preprocessor. Said preprocessor is written in Lua, so we'll
first compile a minimal Lua interpreter:
gcc -o minilua luajit-2.0/src/host/minilua.c
With this interpreter in place, we can run the DynASM preprocessor:
./minilua luajit-2.0/dynasm/dynasm.lua -o tutorial.posix64.c -D X64 tutorial.c
With preprocessing done, we can now invoke a C compiler:
gcc -o tutorial tutorial.posix64.c
We can then run the resulting executable, which should fairly quickly render the Mandelbrot set:
./tutorial mandelbrot.bf