Brainfuck compiler for linux x86_64 written in nasm x86_64
x86-64
nasm
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

bfc.asm 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660
  1. ; bfc : a brainfuck compiler & interpreter
  2. ; Copyright (C) 2018 Weber Yann
  3. ;
  4. ; This program is free software; you can redistribute it and/or modify
  5. ; it under the terms of the GNU General Public License as published by
  6. ; the Free Software Foundation; either version 3 of the License, or
  7. ; any later version.
  8. ;
  9. ; This program is distributed in the hope that it will be useful,
  10. ; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. ; GNU General Public License for more details.
  13. ;
  14. ; You should have received a copy of the GNU General Public License
  15. ; along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. ;
  17. ; A brainfuck compiler & interpreter :
  18. ; Build : nasm -felf64 bfc.asm && ld -s -melf_x86_64 bfc.o -o bfc
  19. ;
  20. ; ./bfc [-h] [-e [-o a.out]] FILE.bf
  21. ; Options :
  22. ; -h print usage and exit
  23. ; -e tell bfc to produce a elf file
  24. ; -o with -e indicate the file to create
  25. ; FILE.bf the brainfuck source file to compile
  26. [bits 64]
  27. %use smartalign
  28. ALIGNMODE k8
  29. %define lbl_incresize 0x2e
  30. %define lbl_decresize 0x50
  31. %define MAP_INC_SIZE 0x1000
  32. %define MAP_INC_MASK 0x0FFF
  33. %define BFMEM_INIT_SZ MAP_INC_SIZE
  34. section .data
  35. bf_start:
  36. jmp .start
  37. .mremap: ; rbx is resize size
  38. cmp rbx, MAP_INC_SIZE
  39. jg .remap_cont
  40. xor rbx, rbx
  41. .remap_cont:
  42. xor rbx, MAP_INC_MASK
  43. add rbx, MAP_INC_SIZE
  44. add r14, rbx
  45. mov rax, 0x19 ; mremap
  46. mov rdi, r15 ; addr
  47. mov rsi, r14 ; oldlen
  48. jc .erremap ; overflow, too much mem !!!!
  49. mov rdx, r14 ; newlen
  50. xor r10, r10 ; maymove
  51. xor r11, r11
  52. syscall
  53. cmp rax, 0
  54. jle .erremap
  55. mov r15, rax
  56. ret
  57. .erremap:
  58. mov rax, 0x3c
  59. mov rdi, 0x2
  60. syscall
  61. align 8
  62. .lbl_incresize: equ $ - bf_start
  63. .incresize: ; rbx is resize size
  64. call .mremap
  65. ret
  66. align 8
  67. .lbl_decresize: equ $ - bf_start
  68. .decresize: ; rbx is decrement
  69. push rsi
  70. push r14
  71. call .incresize
  72. pop rcx ; old size
  73. mov rdi, rsi
  74. inc rdi
  75. .decresize_cpy:
  76. movsb
  77. loop .decresize_cpy
  78. mov byte [r15], 0
  79. pop rsi
  80. ret
  81. .errmap:
  82. mov rax, 0x3c
  83. mov rdi, 0x1
  84. syscall
  85. .start:
  86. ;map init
  87. mov rax, 0x9 ;mmap
  88. xor rdi, rdi
  89. mov rsi, BFMEM_INIT_SZ ;len
  90. mov r14, rsi
  91. mov rdx, (0x1|0x2) ;PROT_READ | PROT_WRITE
  92. mov r10, (0x2 | 0x20) ;flags MAP_PRIVATE | MAP_ANONYMOUS
  93. mov r8, -1 ;fd
  94. xor r9, r9
  95. syscall
  96. cmp rax, 0
  97. jle .errmap
  98. mov r15, rax
  99. mov rsi, r15
  100. add rsi, (BFMEM_INIT_SZ / 2) ; BF ptr
  101. align 8
  102. bf_start_sz: equ $ - bf_start
  103. ; In piece of code call jump is achieved by adding
  104. ; an offset to the JIT map base addr
  105. ; this base address has to be on top of the stack
  106. ; when executing this small piece of code
  107. ;
  108. ; the first instruction has to be a mov of a byte
  109. ; in a register. This operation will be updated to
  110. ; "pass" a parameter
  111. bf_decptr:
  112. mov rbx, strict qword 0x1
  113. push rbx
  114. cmp rsi, rbx
  115. jge .end
  116. mov rax, [rsp+8]
  117. add rax, bf_start.lbl_decresize
  118. call rax
  119. .end:
  120. pop rbx
  121. sub rsi, rbx
  122. bf_decptr_sz: equ $ - bf_decptr
  123. bf_incptr:
  124. mov rbx, strict qword 0x1
  125. push rbx
  126. mov rax, rsi
  127. sub rax, r15
  128. cmp rax, r14
  129. jl .end
  130. mov rax, [rsp+8]
  131. add rax, bf_start.lbl_incresize
  132. call rax
  133. .end:
  134. pop rbx
  135. add rsi, rbx
  136. bf_incptr_sz: equ $ - bf_incptr
  137. bf_incval:
  138. mov rbx, strict qword 0x1
  139. xor rax, rax
  140. mov al,[rsi]
  141. add rax, rbx
  142. mov [rsi], al
  143. bf_incval_sz: equ $ - bf_incval
  144. bf_decval:
  145. mov rbx, strict qword 0x1
  146. xor rax, rax
  147. mov al, [rsi]
  148. sub rax, rbx
  149. mov [rsi], al
  150. bf_decval_sz: equ $ - bf_decval
  151. bf_readval:
  152. mov rdx, strict qword 0x1
  153. push rsi
  154. xor rax, rax ; read
  155. xor rdi, rdi ; stdin
  156. syscall
  157. test rax, rax
  158. jnz .end
  159. mov byte [rsi], 0
  160. .end:
  161. pop rsi
  162. bf_readval_sz: equ $ - bf_readval
  163. bf_writeval:
  164. mov rdx, strict qword 0x1
  165. push rsi
  166. xor rax, rax ; write
  167. inc rax
  168. mov rdi, rax ; stdout
  169. syscall
  170. pop rsi
  171. bf_writeval_sz: equ $ - bf_writeval
  172. bf_loopstart:
  173. mov rbx, strict qword 0x1
  174. xor rdx, rdx
  175. mov dl, [rsi]
  176. cmp dl, 0
  177. jnz .end
  178. jmp rbx
  179. .end:
  180. bf_loopstart_sz: equ $ - bf_loopstart
  181. bf_loopend:
  182. mov rbx, strict qword 0x1
  183. xor rdx, rdx
  184. mov dl, [rsi]
  185. cmp dl, 0
  186. jz .end
  187. jmp rbx
  188. .end:
  189. bf_loopend_sz: equ $ - bf_loopend
  190. bf_exit:
  191. mov rax, 0x3c
  192. xor rdi, rdi
  193. syscall
  194. bf_exit_sz: equ $ - bf_exit
  195. miss_open: db "Missing opening '[' matching closing ']'"
  196. miss_open_sz: equ $ - miss_open
  197. chr_list : db ": ", 0xA
  198. read_error: db "Error reading file "
  199. read_error_sz: equ $ - read_error
  200. usage_err: db "Usage : FILE.BF"
  201. usage_err_sz: equ $ - usage_err
  202. open_err: db "Error opening file", 0xa
  203. open_err_sz: equ $ - open_err
  204. section .bss
  205. read_buff: resb 128
  206. section .text
  207. global _start
  208. _start:
  209. mov rcx, [rsp] ; argc
  210. cmp rcx, 2
  211. jne .badarg
  212. ; JIT code map init
  213. ; rsi map size
  214. mov rsi, 0x10
  215. call initmap
  216. mov rax, 0x2 ; open
  217. mov rdi, [rsp+16] ; argv[1]
  218. xor rsi, rsi ; no flags
  219. xor rdx, rdx ; O_RDONLY
  220. syscall
  221. cmp rax, 0
  222. jl .err_open
  223. call compile_bf
  224. ; set code map perm
  225. mov rax, 0xA ; mprotect
  226. mov rdi, r15
  227. mov rsi, r14
  228. mov rdx, 0x4 | 0x1 ; PROT_EXEC | PROT_READ
  229. syscall
  230. push r15
  231. jmp r15
  232. .err_open:
  233. mov rax, 1 ; write
  234. mov rdi, 2 ; stderr
  235. mov rsi, open_err
  236. mov rdx, open_err_sz
  237. syscall
  238. .badarg:
  239. mov rax, 1 ;write
  240. mov rdi, 2 ; stderr
  241. mov rsi, usage_err
  242. mov rdx, 8 ; "Usage : "
  243. syscall
  244. mov rsi, [rsp+8] ; argv[0]
  245. xor rdx, rdx
  246. xor rcx, rcx
  247. .argv0len:
  248. inc rsi
  249. inc rdx
  250. mov cl, [rsi]
  251. test cl, cl
  252. jnz .argv0len
  253. mov rax, 1
  254. mov rdi, 2
  255. mov rsi, [rsp+8] ; argv[0]
  256. syscall
  257. mov rax, 1 ;write
  258. mov rdi, 2 ; stderr
  259. mov rsi, usage_err + 7
  260. mov rdx, usage_err_sz - 7 ; usage opts
  261. syscall
  262. mov rax, 1
  263. mov rdi, 2
  264. mov rsi, chr_list + 2 ; \n
  265. mov rdx, 1
  266. syscall
  267. mov rax, 0x3c ; exit
  268. mov rdi, 1
  269. syscall
  270. ; Init a writable memory map
  271. ; len in rsi
  272. ; ret : r14 map size
  273. ; r15 map addr
  274. initmap:
  275. mov r14, rsi
  276. mov rax, 0x9 ;mmap
  277. xor rdi, rdi ;addr
  278. ; rsi is len
  279. mov rdx, (0x1|0x2) ;PROT_READ | PROT_WRITE
  280. mov r10, (0x2 | 0x20) ;flags MAP_PRIVATE | MAP_ANONYMOUS
  281. mov r8, -1 ;fd
  282. xor r9, r9
  283. syscall
  284. cmp rax, 0
  285. jle .err
  286. mov r15, rax
  287. ret
  288. .err:
  289. mov rax, 0x3c
  290. mov rdi, 0x2
  291. syscall
  292. ; resize the memory map
  293. ; addr in r15 (0 if init)
  294. ; len in r14
  295. ; inc size in bytes in r10
  296. mremap:
  297. mov rax, 0x19 ;mremap
  298. mov rdi, r15 ;addr
  299. mov rsi, r14 ;oldlen
  300. mov rdx, rsi
  301. add rdx, MAP_INC_SIZE ; newlen
  302. xor r10, r10 ; MAYMOVE
  303. xor r11, r11
  304. syscall
  305. cmp rax, 0
  306. jle .err
  307. mov r15, rax
  308. add r14, MAP_INC_SIZE
  309. ret
  310. .err:
  311. mov rax, 0x3c
  312. mov rdi, 0x3
  313. syscall
  314. ; JIT brainfuck compiler. Read bf code from fd and write
  315. ; machine code in a dedicated anon map with PROT_READ | PROT_EXEC
  316. ; Using heap to read file
  317. ; args :
  318. ; rax source fd
  319. ; r14 map size
  320. ; r15 map addr
  321. ; ret :
  322. ; rax map addr
  323. compile_bf:
  324. ; Allocating growing heap to store various datas
  325. ; heap start will be stored in r13
  326. %define fd [r13]
  327. %define map_ptr [r13+0x8]
  328. %define base_rsp [r13+0x10]
  329. %define chr_repeat [r13+0x18]
  330. %define line_count [r13+0x20]
  331. %define chr_count [r13+0x28]
  332. %define chr_buff_off 0x30
  333. %define chr_buff [r13+chr_buff_off]
  334. %define prev_chr_off 0x31
  335. %define prev_chr [r13+prev_chr_off]
  336. %define heap_size 0x32
  337. push rax ; source fd
  338. mov rax, 0xc ; brk
  339. xor rdi, rdi
  340. syscall
  341. push rax ; heap start
  342. mov rdi, rax
  343. add rdi, heap_size ; new heap addr
  344. mov rax, 0xc ; brk
  345. syscall
  346. pop rdi ; heap start
  347. mov r13, rdi
  348. pop rax ; source fd
  349. mov fd, rax
  350. ; init heap
  351. mov byte prev_chr, 0
  352. mov qword chr_count, 0
  353. mov qword line_count, 0
  354. mov qword chr_repeat, 1
  355. mov base_rsp, rsp ; save rsp in heap
  356. ; copy code map header
  357. mov rdi, r15
  358. mov rsi, bf_start
  359. mov rdx, bf_start_sz
  360. call code_cpy
  361. mov map_ptr, rax ; new map ptr
  362. ; read first char in prev_chr
  363. xor rax, rax ;read
  364. mov rdi, fd ; fd
  365. mov rsi, r13
  366. add rsi, prev_chr_off ; chr_prev
  367. mov rdx, 1 ; read 1 byte
  368. syscall
  369. cmp rax, 0
  370. jle .read_error
  371. .readloop:
  372. xor rax, rax ;read
  373. mov rdi, fd ; fd
  374. mov rsi, r13
  375. add rsi, chr_buff_off ; buff byte
  376. mov rdx, 1 ; read 1 byte
  377. syscall
  378. cmp rax, 0
  379. je .endread
  380. jl .read_error ; error
  381. mov rax, chr_count ; chr counter
  382. inc rax
  383. mov chr_count, rax
  384. mov al, chr_buff
  385. ; arg for loop is not a repeat counter
  386. cmp al, 0x5b ; '['
  387. je .cmpchar
  388. cmp al, 0x5d ; '['
  389. je .cmpchar
  390. cmp al, prev_chr
  391. je .incnum ; same instruction, incrementing counter
  392. .cmpchar:
  393. mov rdi, map_ptr ; prepare to copy in code map
  394. ; compare previous char and store current in prev
  395. ; note : chr_repeat has to be reset by .nxtinstr
  396. ; after jump
  397. xchg prev_chr, al
  398. cmp al, 0x3c ; '<'
  399. je .lptr
  400. cmp al, 0x3e ; '>'
  401. je .rptr
  402. cmp al, 0x2b ; '+'
  403. je .incval
  404. cmp al, 0x2d ; '-'
  405. je .decval
  406. cmp al, 0x2e ; '.'
  407. je .wrval
  408. cmp al, 0x2c ; ','
  409. je .rdval
  410. cmp al, 0x5b ; '['
  411. je .loopstart
  412. cmp al, 0x5d ; ']'
  413. je .loopend
  414. cmp al, 0x0a ; '\n'
  415. je .line
  416. ; chr is not an instruction, printing them
  417. ; on stderr
  418. mov chr_buff, al
  419. mov rcx, chr_repeat
  420. .errchr:
  421. push rcx
  422. mov rax, 1 ; write
  423. mov rdi, 2 ; stderr
  424. mov rsi, r13
  425. add rsi, chr_buff_off ; heap buff
  426. mov rdx, rax ; sz 1
  427. syscall
  428. pop rcx
  429. loop .errchr
  430. jmp .nxtinstr
  431. .line: ; increment line counter in heap
  432. mov rax, line_count
  433. add rax, chr_repeat
  434. mov line_count, rax
  435. mov rcx, chr_repeat
  436. jmp .errchr ; print the newline
  437. ; following ref copy assume rdi to be map_ptr
  438. .incval:
  439. mov rsi, bf_incval
  440. mov rdx, bf_incval_sz
  441. push rdx
  442. jmp .callcpy
  443. .decval:
  444. mov rsi, bf_decval
  445. mov rdx, bf_decval_sz
  446. push rdx
  447. jmp .callcpy
  448. .lptr:
  449. mov rsi, bf_decptr
  450. mov rdx, bf_decptr_sz
  451. push rdx
  452. jmp .callcpy
  453. .rptr:
  454. mov rsi, bf_incptr
  455. mov rdx, bf_incptr_sz
  456. push rdx
  457. jmp .callcpy
  458. .wrval:
  459. mov rsi, bf_writeval
  460. mov rdx, bf_writeval_sz
  461. push rdx
  462. jmp .callcpy
  463. .rdval:
  464. mov rsi, bf_readval
  465. mov rdx, bf_readval_sz
  466. push rdx
  467. jmp .callcpy
  468. .loopstart:
  469. push qword map_ptr ; ret addr
  470. mov rsi, bf_loopstart
  471. mov rdx, bf_loopstart_sz
  472. push rdx
  473. jmp .callcpy
  474. .loopend:
  475. cmp rsp, base_rsp
  476. je .loop_err_miss_open
  477. mov rsi, bf_loopend
  478. mov rdx, bf_loopend_sz
  479. push rdx
  480. call code_cpy
  481. mov map_ptr, rax
  482. pop rdx
  483. sub rax, rdx
  484. add rax, 2 ;arg addr in code map
  485. pop rbx ; loop_start code addr
  486. mov [rax], rbx ; loop end jump to start
  487. mov rax, map_ptr
  488. mov [rbx+2], rax ; start jump to end
  489. jmp .nxtinstr
  490. .callcpy:
  491. call code_cpy
  492. mov map_ptr, rax
  493. ; set the 1st instr rgs in the mapping
  494. ; and reinit chr_repeat
  495. pop rdx
  496. sub rax, rdx
  497. add rax, 2 ; arg addr in code map
  498. mov rbx, chr_repeat
  499. mov [rax], rbx
  500. .nxtinstr:
  501. ; reinit chr_repeat
  502. mov qword chr_repeat, 1
  503. jmp .readloop
  504. .incnum:
  505. ; same instruction found incrementing
  506. ; chr_repeat
  507. mov rbx, chr_repeat
  508. inc rbx
  509. jc .incoverflow
  510. mov chr_repeat, rbx
  511. jmp .readloop
  512. .incoverflow:
  513. dec rbx
  514. mov chr_repeat, rbx
  515. jmp .cmpchar
  516. jmp .readloop
  517. .loop_err_miss_open:
  518. ; miss_open_err
  519. mov rax, 1
  520. mov rdi, 2 ; stderr
  521. mov rsi, miss_open
  522. mov rdx, miss_open_sz
  523. syscall
  524. jmp .exit_error
  525. .read_error:
  526. xor rax, rax
  527. inc rax ; write
  528. mov rsi, 2 ; stderr
  529. mov rdi, read_error
  530. mov rdx, read_error_sz
  531. syscall
  532. jmp .exit_error
  533. .exit_error:
  534. mov rax, 1
  535. mov rdi, 2
  536. mov rsi, chr_list + 2
  537. mov rdx, 1
  538. syscall
  539. mov rax, 0x3c
  540. mov rdi, 0x11
  541. syscall
  542. .endread: ; EOF reached
  543. ; fake \0 read to process prev_chr
  544. mov byte chr_buff, 0
  545. mov bl, prev_chr
  546. test bl, bl
  547. jnz .cmpchar
  548. ; prevchar is 0 copying exit in code map
  549. .end_compile:
  550. mov rdi, map_ptr
  551. mov rsi, bf_exit
  552. mov rdx, bf_exit_sz
  553. call code_cpy
  554. ; restoring stack
  555. mov rsp, base_rsp
  556. ; restore heap
  557. mov rax, 0xc ; brk
  558. mov rdi, r13
  559. syscall
  560. ret
  561. %undef fd
  562. %undef map_ptr
  563. %undef base_rsp
  564. %undef chr_buff
  565. %undef prev_chr
  566. %undef heap_size
  567. ; Copy bf code from data to map
  568. ; Use :
  569. ; r15 map start
  570. ; r14 map size
  571. ; rdi map ptr
  572. ; rsi code ptr
  573. ; rdx code size in bytes
  574. ; ret :
  575. ; rax : new map ptr
  576. code_cpy:
  577. push rdx
  578. mov rax, rdi
  579. sub rax, r15 ; used len in map
  580. push rax
  581. mov rcx, rdx
  582. cmp rax, r14 ; rax is future len (after copy)
  583. jle .copy
  584. ; resize
  585. push rsi ; save code_ptr
  586. call mremap
  587. pop rsi ; new code_ptr
  588. .copy:
  589. mov rdi, r15
  590. pop rax
  591. add rdi, rax ; new map ptr
  592. pop rcx ; size in words to write
  593. cld ; clear DF
  594. .copyloop:
  595. movsb
  596. loop .copyloop
  597. mov rax, rdi
  598. .ret:
  599. ret