; bfc : a brainfuck compiler ; Copyright (C) 2018 Weber Yann ; ; This program is free software; you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by ; the Free Software Foundation; either version 3 of the License, or ; any later version. ; ; This program is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ; GNU General Public License for more details. ; ; You should have received a copy of the GNU General Public License ; along with this program. If not, see . ; ; A brainfuck compiler : ; Build : nasm -felf64 bfc.asm && ld -s -melf_x86_64 bfc.o -o bfc ; ; ./bfc [-h] [-o a.out] FILE.bf ; Options : ; -h print usage and exit ; -o indicate the file to create default is a.out ; FILE.bf the brainfuck source file to compile [bits 64] %use smartalign ALIGNMODE k8 %define lbl_incresize 0x2e %define lbl_decresize 0x50 %define MAP_INC_SIZE 0x1000 %define MAP_INC_MASK 0x0FFF %define BFMEM_INIT_SZ MAP_INC_SIZE %define ELF_CODE_OFFSET 0x00400080 section .data bf_start: jmp .start .mremap: ; Resize the mmap ; rbx is resize min size ; rdi is map ptr offset ; r14 is map_len ; r15 is map_addr ; Returns : ; r15 new map_addr ; r14 new map_len ; rdi map ptr offset ; rsi map ptr push rdi ; set min resize xor rdx, rdx .loop_min_remap: add rdx, MAP_INC_SIZE cmp rdx, rbx jl .loop_min_remap add r14, rdx ; newlen mov rax, 0x19 ; mremap mov rdi, r15 ; addr mov rsi, r14 ; oldlen jc .erremap ; overflow, too much mem !!!! mov rdx, r14 ; newlen xor r10, r10 ; maymove xor r11, r11 syscall cmp rax, 0 jle .erremap mov r15, rax ; restore rsi & rdi pop rdi mov rsi, r15 add rsi, rdi ret .erremap: mov rax, 0x3c mov rdi, 0x2 syscall align 8 .lbl_incresize: equ $ - bf_start .incresize: ; Resize the map on ptr increment ; rbx is increment count ; rdi is map ptr offset ; r14 is map_len ; r15 is map_addr call .mremap ret align 8 .lbl_decresize: equ $ - bf_start .decresize: ; Resize the map on ptr decrement ; rbx is decrement count ; rdi is map ptr offset ; r14 is map_len ; r15 is map_addr ; Note : data has to be shifted push r14 ; old map len call .incresize ; shift datas pop rcx ; old len mov rbx, r14 ; new_len push rsi push rdi mov rsi, r15 mov rdi, rsi add rsi, rcx ; add old_len to map_ptr add rdi, rbx ; add new_len to map_ptr sub rbx, rcx ; new_len - old_len : resize std ; set DF to dec rsi & rdi .decresize_shift: movsb loop .decresize_shift ; set first map bytes to 0 mov rdi, r15 add rdi, rbx dec rdi mov rcx, rbx .decresize_zeros: mov byte [rdi], 0x0 dec rdi loop .decresize_zeros pop rdi pop rsi ; update map_ptr & offset given the shift add rdi, rbx add rsi, rbx ret .errmap: mov rax, 0x3c mov rdi, 0x1 syscall .start: ;map init mov rax, 0x9 ;mmap xor rdi, rdi mov rsi, BFMEM_INIT_SZ ;len mov r14, rsi mov rdx, (0x1|0x2) ;PROT_READ | PROT_WRITE mov r10, (0x2 | 0x20) ;flags MAP_PRIVATE | MAP_ANONYMOUS mov r8, -1 ;fd xor r9, r9 syscall cmp rax, 0 jle .errmap ; Sets BF runtime : ; r14 is map len mov r15, rax ; r15 is map addr mov rdi, (BFMEM_INIT_SZ / 2) ; rdi is ptr idx mov rsi, r15 add rsi, rdi ; rsi is bf ptr align 8 bf_start_sz: equ $ - bf_start ; In piece of code the first instruction has to be a mov of ; a quadword in a register. This operation will be updated ; to "pass" a parameter ; rsi is map_ptr ; rdi is map ptr offset ; r14 is map_len ; r15 is map_addr bf_decptr: ; dec map ptr ; rbx is dec count mov rbx, strict qword 0x1 push rbx cmp rdi, rbx jge .end mov rax, ELF_CODE_OFFSET + bf_start.lbl_decresize call rax ;call (ELF_CODE_OFFSET + bf_start.lbl_decresize) .end: pop rbx sub rsi, rbx sub rdi, rbx bf_decptr_sz: equ $ - bf_decptr bf_incptr: mov rbx, strict qword 0x1 push rbx add rbx, rdi cmp rdi, r14 jge .end mov rbx, [rsp] mov rax, ELF_CODE_OFFSET + bf_start.lbl_incresize call rax ;call (ELF_CODE_OFFSET + bf_start.lbl_incresize) .end: pop rbx add rsi, rbx add rdi, rbx bf_incptr_sz: equ $ - bf_incptr bf_incval: mov rbx, strict qword 0x1 mov al,[rsi] add al, bl mov [rsi], al bf_incval_sz: equ $ - bf_incval bf_decval: mov rbx, strict qword 0x1 mov al, [rsi] sub al, bl mov [rsi], al bf_decval_sz: equ $ - bf_decval bf_readval: mov rdx, strict qword 0x1 push rdi push rsi xor rax, rax ; read xor rdi, rdi ; stdin syscall test rax, rax jnz .end mov byte [rsi], 0 .end: pop rsi pop rdi bf_readval_sz: equ $ - bf_readval bf_writeval: mov rcx, strict qword 0x1 push rdi push rsi .loop_write: push rcx mov rax, 1 ; write mov rdi, rax ; stdout mov rsi, [rsp+8] mov rdx, 1 ; 1 chr syscall pop rcx loop .loop_write pop rsi pop rdi bf_writeval_sz: equ $ - bf_writeval bf_loopstart: mov rbx, strict qword 0x1 mov dl, [rsi] cmp dl, 0 jnz .end jmp rbx .end: bf_loopstart_sz: equ $ - bf_loopstart bf_loopend: mov rbx, strict qword 0x1 mov dl, [rsi] cmp dl, 0 jz .end jmp rbx .end: bf_loopend_sz: equ $ - bf_loopend bf_exit: mov rax, 0x3c xor rdi, rdi syscall bf_exit_sz: equ $ - bf_exit elf_head: dw 0x457f, 0x464c, 0x0102, 0x0001, times 4 dw 0x0 ; 0x10 dw 0x0002, 0x003e, 0x0001, 0x0000 dw 0x080, 0x040, 0x0, 0x0 ; 0x20 dw 0x0040 times 3 dw 0x0 elf_section_head_offset : times 8 db 0xFF ; 0x28 ; 0x30 dw 0x0, 0x0, 0x40, 0x38, 0x1, 0x40, 0x3, 0x2 ; 0x40 section header dw 0x1, 0x0, 0x5 ; load in memory with RX perm times 5 dw 0x0 ; 0x50 dw 0x0, 0x40, 0, 0, 0, 0x40, 0, 0 ; load at 0x40000 ; 0x60 elf_prog_sz: times 16 db 0xFF ; 0x60 & 0x 68 ; 0x70 dw 0x0, 0x20 times 6 dw 0x0 elf_head_sz: equ $ - elf_head elf_shstrtab: db ".shstrtab", 0x0, ".text", 0x0 align 8 elf_shstrtab_sz: equ $ - elf_shstrtab elf_section_headers: times 8 dq 0x0 ; head0 ; head 1 dw 0xb, 0, 0x1, 0, 0x6, 0, 0, 0 dw 0x80, 0x40, 0, 0, 0x80, 0, 0, 0 elf_section_text_sz: times 8 db 0xFF dq 0x0 dw 0x10 times 7 dw 0x0 ; head2 dw 0x1, 0, 0x3 , 0, 0, 0, 0, 0 dq 0x0 elf_section_strtab_off: times 8 db 0xFF dw 0x11 times 7 dw 0x0 dw 0x1 times 7 dw 0x0 elf_section_headers_sz: equ $ - elf_section_headers default_output: db "a.out", 0x0 miss_open: db "Missing opening '[' matching closing ']'" miss_open_sz: equ $ - miss_open chr_list : db ": ", 0xA, 0x0 read_error: db "Error reading file " read_error_sz: equ $ - read_error usage_err: db "Usage : [-o a.out] FILE.BF" usage_err_sz: equ $ - usage_err open_err: db "Error opening file", 0xa open_err_sz: equ $ - open_err section .text global _start _start: ; using heap to store arguments %define bf_source [r13] %define elf_file [r13+0x8] %define heap_size 0x10 ;heap init mov rax, 0xc xor rdi, rdi syscall mov rdi, rax mov r13, rax ; heap start add rdi, heap_size mov rax, 0xc syscall mov rax, default_output mov elf_file, rax ;argument parsing mov rcx, [rsp] ; argc cmp rcx, 2 jl .badarg ;je .init_1arg cmp rcx, 4 jg .badarg mov rsi, rsp add rsi, 8 ; argv[0] dec rcx .argloop: add rsi, 8 mov rdi, [rsi] mov al, [rdi] cmp al, 0x2d ; '-' jne .filearg mov al, [rdi+2] test al, al jnz .filearg ; arg is '-X' testing X mov al, [rdi+1] cmp al, 0x68 ; '-h' je .badarg cmp al, 0x6f ; '-o' jne .badarg ; -o storing file test rcx, rcx jz .badarg ; no more args dec rcx add rsi, 8 mov rdi, [rsi] mov elf_file, rdi loop .argloop jmp .init .filearg: mov rax, bf_source cmp rax, 0 .br3: jnz .badarg ; file allready given mov bf_source, rdi loop .argloop jmp .init ; useless .init: ; code map init ; rsi map size mov rsi, 0x10 call initmap mov rax, 0x2 ; open mov rdi, bf_source ; from heap test rdi, rdi jz .badarg xor rsi, rsi ; O_RDONLY xor rdx, rdx ; no mode syscall cmp rax, 0 jl .err_open push rax ; fd push r13 ; heap call compile_bf pop r13 ; heap pop rdi ; fd sub rax, r15 push rax ; map len mov rax, 0x3 ; close syscall ; writing elf file mov rax, [rsp] ; map len mov [elf_section_text_sz], rax add rax, elf_head_sz ; elf head + map_ptr mov [elf_head + 0x60], rax mov [elf_head + 0x68], rax mov [elf_section_strtab_off], rax add rax, elf_shstrtab_sz ; section head offset xor r14, r14 ; store align padding for section header xor rbx, rbx mov bl, al and bl, 0x0F test bl, bl jz .aligned_section mov r14, 0x10 sub r14, rbx and al, 0xF0 add rax, 0x10 .aligned_section: mov [elf_head + 0x28], rax mov rax, 0x2 mov rdi, elf_file mov rsi, 0x40 | 0x200 | 0x1 ; O_CREAT | O_TRUNC | O_WRONLY mov rdx, 755o ; perm syscall cmp rax, 0 jl .err_open push rax ; fd mov rax, 1 mov rdi, [rsp] mov rsi, elf_head mov rdx, elf_head_sz syscall mov rax, 1 mov rdi, [rsp] mov rsi, r15 ; map_addr mov rdx, [rsp+8] ; map len syscall mov rax, 1 mov rdi, [rsp] mov rsi, elf_shstrtab mov rdx, elf_shstrtab_sz syscall .padloop: test r14, r14 jz .end_padloop mov rax, 1 mov rdi, [rsp] mov rsi, elf_section_headers ; 0x0 mov rdx, 1 syscall dec r14 jmp .padloop .end_padloop: mov rax, 1 mov rdi, [rsp] mov rsi, elf_section_headers mov rdx, elf_section_headers_sz syscall pop rdi ; fd mov rax, 0x3 ; close syscall pop rax ; map_len mov rax, 0x3c ; exit xor rdi, rdi syscall .err_open: mov rax, 1 ; write mov rdi, 2 ; stderr mov rsi, open_err mov rdx, open_err_sz syscall .badarg: mov rax, 1 ;write mov rdi, 2 ; stderr mov rsi, usage_err mov rdx, 8 ; "Usage : " syscall mov rsi, [rsp+8] ; argv[0] xor rdx, rdx xor rcx, rcx .argv0len: inc rsi inc rdx mov cl, [rsi] test cl, cl jnz .argv0len mov rax, 1 mov rdi, 2 mov rsi, [rsp+8] ; argv[0] syscall mov rax, 1 ;write mov rdi, 2 ; stderr mov rsi, usage_err + 7 mov rdx, usage_err_sz - 7 ; usage opts syscall mov rax, 1 mov rdi, 2 mov rsi, chr_list + 2 ; \n mov rdx, 1 syscall mov rax, 0x3c ; exit mov rdi, 1 syscall %undef heap_size %undef elf_file %undef bf_source ; Init a writable memory map ; len in rsi ; ret : r14 map size ; r15 map addr initmap: mov r14, rsi mov rax, 0x9 ;mmap xor rdi, rdi ;addr ; rsi is len mov rdx, (0x1|0x2) ;PROT_READ | PROT_WRITE mov r10, (0x2 | 0x20) ;flags MAP_PRIVATE | MAP_ANONYMOUS mov r8, -1 ;fd xor r9, r9 syscall cmp rax, 0 jle .err mov r15, rax ret .err: mov rax, 0x3c mov rdi, 0x2 syscall ; resize the memory map ; addr in r15 (0 if init) ; len in r14 ; inc size in bytes in r10 mremap: mov rax, 0x19 ;mremap mov rdi, r15 ;addr mov rsi, r14 ;oldlen mov rdx, rsi add rdx, MAP_INC_SIZE ; newlen xor r10, r10 ; MAYMOVE xor r11, r11 syscall cmp rax, 0 jle .err mov r15, rax add r14, MAP_INC_SIZE ret .err: mov rax, 0x3c mov rdi, 0x3 syscall ; JIT brainfuck compiler. Read bf code from fd and write ; machine code in a dedicated anon map with PROT_READ | PROT_EXEC ; Using heap to read file ; args : ; rax source fd ; r14 map size ; r15 map addr ; ret : ; rax map ptr ; r15 map addr ; r14 map size compile_bf: ; Allocating growing heap to store various datas ; heap start will be stored in r13 %define fd [r13] %define map_ptr [r13+0x8] %define base_rsp [r13+0x10] %define chr_repeat [r13+0x18] %define line_count [r13+0x20] %define chr_count [r13+0x28] %define chr_buff_off 0x30 %define chr_buff [r13+chr_buff_off] %define prev_chr_off 0x31 %define prev_chr [r13+prev_chr_off] %define heap_size 0x32 push rax ; source fd mov rax, 0xc ; brk xor rdi, rdi syscall push rax ; heap start mov rdi, rax add rdi, heap_size ; new heap addr mov rax, 0xc ; brk syscall pop rdi ; heap start mov r13, rdi pop rax ; source fd mov fd, rax ; init heap mov byte prev_chr, 0 mov qword chr_count, 0 mov qword line_count, 0 mov qword chr_repeat, 1 mov base_rsp, rsp ; save rsp in heap ; copy code map header mov rdi, r15 mov rsi, bf_start mov rdx, bf_start_sz call code_cpy mov map_ptr, rax ; new map ptr ; read first char in prev_chr xor rax, rax ;read mov rdi, fd ; fd mov rsi, r13 add rsi, prev_chr_off ; chr_prev mov rdx, 1 ; read 1 byte syscall cmp rax, 0 jle .read_error .readloop: xor rax, rax ;read mov rdi, fd ; fd mov rsi, r13 add rsi, chr_buff_off ; buff byte mov rdx, 1 ; read 1 byte syscall cmp rax, 0 je .endread jl .read_error ; error mov rax, chr_count ; chr counter inc rax mov chr_count, rax mov al, chr_buff ; arg for loop is not a repeat counter cmp al, 0x5b ; '[' je .cmpchar cmp al, 0x5d ; '[' je .cmpchar cmp al, prev_chr je .incnum ; same instruction, incrementing counter .cmpchar: mov rdi, map_ptr ; prepare to copy in code map ; compare previous char and store current in prev ; note : chr_repeat has to be reset by .nxtinstr ; after jump xchg prev_chr, al cmp al, 0x3c ; '<' je .lptr cmp al, 0x3e ; '>' je .rptr cmp al, 0x2b ; '+' je .incval cmp al, 0x2d ; '-' je .decval cmp al, 0x2e ; '.' je .wrval cmp al, 0x2c ; ',' je .rdval cmp al, 0x5b ; '[' je .loopstart cmp al, 0x5d ; ']' je .loopend cmp al, 0x0a ; '\n' je .line ; chr is not an instruction, printing them ; on stderr mov chr_buff, al mov rcx, chr_repeat .errchr: push rcx mov rax, 1 ; write mov rdi, 2 ; stderr mov rsi, r13 add rsi, chr_buff_off ; heap buff mov rdx, rax ; sz 1 syscall pop rcx loop .errchr jmp .nxtinstr .line: ; increment line counter in heap mov rax, line_count add rax, chr_repeat mov line_count, rax mov rcx, chr_repeat jmp .errchr ; print the newline ; following ref copy assume rdi to be map_ptr .incval: mov rsi, bf_incval mov rdx, bf_incval_sz push rdx jmp .callcpy .decval: mov rsi, bf_decval mov rdx, bf_decval_sz push rdx jmp .callcpy .lptr: mov rsi, bf_decptr mov rdx, bf_decptr_sz push rdx jmp .callcpy .rptr: mov rsi, bf_incptr mov rdx, bf_incptr_sz push rdx jmp .callcpy .wrval: mov rsi, bf_writeval mov rdx, bf_writeval_sz push rdx jmp .callcpy .rdval: mov rsi, bf_readval mov rdx, bf_readval_sz push rdx jmp .callcpy .loopstart: mov rbx, map_ptr sub rbx, r15 push rbx ; loop offset from map start ;push qword map_ptr ; ret addr mov rsi, bf_loopstart mov rdx, bf_loopstart_sz push rdx jmp .callcpy .loopend: cmp rsp, base_rsp je .loop_err_miss_open mov rsi, bf_loopend mov rdx, bf_loopend_sz push rdx call code_cpy mov map_ptr, rax ; rax is map_ptr mov rdi, rax pop rdx ; bf_loopend_sz lea rdi, [rax + 2] sub rdi, rdx ; arg addr for loop_end : map_ptr - loopend_sz pop rbx ; loop_start code offset lea rax, [rbx+ELF_CODE_OFFSET] ; jmp to loop_start in loop_end mov [rdi], rax .br1: mov rax, map_ptr sub rax, r15 ; map_ptr - map_addr : map_offset of loop_end nxt instr lea rdi, [rbx + r15 + 2] ; arg addr for loop_start in map lea rax, [rax + ELF_CODE_OFFSET] .br2: mov [rdi], rax .br3: jmp .nxtinstr .callcpy: call code_cpy mov map_ptr, rax ; set the 1st instr rgs in the mapping ; and reinit chr_repeat pop rdx sub rax, rdx add rax, 2 ; arg addr in code map mov rbx, chr_repeat mov [rax], rbx .nxtinstr: ; reinit chr_repeat mov qword chr_repeat, 1 jmp .readloop .incnum: ; same instruction found incrementing ; chr_repeat mov rbx, chr_repeat inc rbx jc .incoverflow mov chr_repeat, rbx jmp .readloop .incoverflow: dec rbx mov chr_repeat, rbx jmp .cmpchar jmp .readloop .loop_err_miss_open: ; miss_open_err mov rax, 1 mov rdi, 2 ; stderr mov rsi, miss_open mov rdx, miss_open_sz syscall jmp .exit_error .read_error: xor rax, rax inc rax ; write mov rsi, 2 ; stderr mov rdi, read_error mov rdx, read_error_sz syscall jmp .exit_error .exit_error: mov rax, 1 mov rdi, 2 mov rsi, chr_list + 2 mov rdx, 1 syscall mov rax, 0x3c mov rdi, 0x11 syscall .endread: ; EOF reached ; fake \0 read to process prev_chr mov byte chr_buff, 0 mov bl, prev_chr test bl, bl jnz .cmpchar ; prevchar is 0 copying exit in code map .end_compile: mov rdi, map_ptr mov rsi, bf_exit mov rdx, bf_exit_sz call code_cpy mov map_ptr, rax ; restoring stack mov rsp, base_rsp push qword map_ptr ; restore heap mov rax, 0xc ; brk mov rdi, r13 syscall pop rax ; return map_ptr ret %undef fd %undef map_ptr %undef base_rsp %undef chr_buff %undef prev_chr %undef heap_size ; Copy bf code from data to map ; Use : ; r15 map start ; r14 map size ; rdi map ptr ; rsi code ptr ; rdx code size in bytes ; ret : ; rax : new map ptr code_cpy: push rdx mov rax, rdi sub rax, r15 ; used len in map push rax mov rcx, rdx cmp rax, r14 ; rax is future len (after copy) jle .copy ; resize push rsi ; save code_ptr call mremap pop rsi ; new code_ptr .copy: mov rdi, r15 pop rax add rdi, rax ; new map ptr pop rcx ; size in words to write cld ; clear DF .copyloop: movsb loop .copyloop mov rax, rdi .ret: ret