lang-bootstrap/03/in02

1093 lines
8.1 KiB
Text
Raw Normal View History

2021-11-10 21:22:25 -05:00
// open input file
im
2021-11-11 12:54:20 -05:00
--IF
2021-11-10 21:22:25 -05:00
JA
zA
IA
im
##2.
sy
// open output file
im
2021-11-11 12:54:20 -05:00
--OF
2021-11-10 21:22:25 -05:00
JA
im
##241.
IA
im
##1ed.
DA
im
##2.
sy
2021-11-12 16:51:28 -05:00
// write ELF header
im
##4.
JA output fd
im
##400000. address of ELF header in this executable
IA
im
##78. length
DA
im
##1. write
sy
2021-11-11 12:54:20 -05:00
// read next line
::rl
im
--LI
RA rbp pointer to line buffer
::rL read loop
2021-11-10 21:22:25 -05:00
im
2021-11-11 12:54:20 -05:00
##3. input file descriptor
JA
IR where to read into
im
##1.
DA read 1 byte
im
##0. syscall 0 (read)
sy
// check how many bytes were read
BA
im
##1.
jg if 1 greater than number of bytes read
:-ef end of file
BR
DR pointer to character we just read
2021-11-12 21:12:59 -05:00
zA
lb
BA
im
##20. ' '
je
:-rL ignore spaces
im
##9. '\t'
je
:-rL ignore tabs
BD
2021-11-11 12:54:20 -05:00
im
##1.
+B
2021-11-12 21:12:59 -05:00
RA increment pointer
2021-11-11 12:54:20 -05:00
BD
zA
lb
BA
im
##a.
jn
:-rL keep looping
// we now have a full line from the file in ::LI
// the pointer to the end of the line is in rbp
// look at the first character
im
--LI
BA
zA
lb
2021-11-10 21:22:25 -05:00
BA
im
2021-11-11 12:54:20 -05:00
##3b. ascii ';'
je if it's a comment,
:-rl jump back to read the next line
im
##a. ascii '\n'
je if it's a blank line,
:-rl jump back to read the next line
im
##3a. ascii ':'
je
:-ld label definition
2021-11-12 21:12:59 -05:00
im
##3f. ascii '?'
je
:-?j
2021-11-10 21:22:25 -05:00
jm
:-ex
2021-11-11 12:54:20 -05:00
// label definition
::ld
// first, check if we're on the second pass.
im
--2P
BA
zA
lb
BA
zA
jn if on second pass,
:-rl ignore this (read next line)
// first get current address
im
##4. output fd
JA
zA
IA offset = 0
im
##1. whence = SEEK_CUR
DA
im
##8. syscall 8 = lseek
sy
BA
im
##400000. address of start of file
+B
DA put current address in rdx
im
--L$
BA
lq
JA
im
--LI
IA
// copy from rsi to rdi until a newline is reached
::lc label copy
BI
zA
lb
BA
// store in rdi
AJ
xc
sb
CA put byte in rcx
// increment rdi,rsi
BJ
im
##1.
+B
JA
BI
im
##1.
+B
IA
BC
im
##a.
jn if byte we read wasn't a newline,
:-lc keep looping
// store address of label in rdi
AD
BJ
sd
// increment rdi by 4, because we stored an 4-byte number
im
##4.
+B
JA
// now set L$ to rdi
im
--L$
BA
AJ
sq
2021-11-12 16:51:28 -05:00
// read the next line
jm
:-rl
// label lookup--set rax to address of label in rbx
::ll
RB put ptr to label in rbp
// if it's the first pass, just return 0
im
--2P
BA
zA
lb
BA
zA
je
:-r0
// okay it's not the second pass
im
##a.
CA terminator '\n'
// use rsi to keep track of position in label list
im
--LB
IA
::lL
// first, check if we've reached the end of the label list (rsi == *L$)
im
--L$
BA
lq
BI
je
:-bl bad label if we've reached the end
JR
im
--s=
cl
BA
im
##1.
je
:-l=
// this isn't the label; advance
::l\
zA
BI
lb
DA
// increment rsi
BI
im
##1.
+B
IA
// check if that byte we looked at was a newline
BD
im
##a.
jn
:-l\ if not, keep looping
// now we need to increment rsi by another 4 bytes, to skip over the address
BI
im
##4.
+B
IA
jm
:-lL
re
::l=
// label found!
// first, increment rsi past newline:
BI
im
##1.
+B
IA
// then, read dword at rsi into rax
BI
zA
ld
// we're done!!
re
// set rax to 1/0 depending on whether rsi and rdi have the same string, up to the terminator in rcx.
::s=
BI
zA
lb
DA
BJ
zA
lb
BD
jn
:-r0 1st characters are not equal
BC
je
:-r1 we reached the end of the string
// increment rsi, rdi
BI
im
##1.
+B
IA
BJ
im
##1.
+B
JA
jm
:-s= keep looping
2021-11-12 21:12:59 -05:00
// conditional jump handling
::?j
cc
// set A to register. takes rbx='0','A','B','C','D','I','J','R','S', outputs instruction to file
::Ar
im
##30. '0'
je
:-A0
im
##41. 'A'
je
:-r0 just return
im
##42. 'B'
je
:-AB
im
##43. 'C'
je
:-AC
im
##44. 'D'
je
:-AD
im
##49. 'I'
je
:-AI
im
##4a. 'J'
je
:-AJ
im
##52. 'R'
je
:-AR
im
##53. 'S'
je
:-AS
jm
:-!r
// emit instruction for "set A to 0".
::A0
zA neat trick we can just put the instruction here; it doesn't screw anything up
im
--A0
IA
im
##2.
DA
jm
:-wr
// emit "set A to B"
::AB
AB
im
--AB
IA
im
##3.
DA
jm
:-wr
// emit "set A to C"
::AC
AC
im
--AC
IA
im
##3.
DA
jm
:-wr
// emit "set A to D"
::AD
AD
im
--AD
IA
im
##3.
DA
jm
:-wr
// emit "set A to I"
::AI
AI
im
--AI
IA
im
##3.
DA
jm
:-wr
// emit "set A to J"
::AJ
AJ
im
--AJ
IA
im
##3.
DA
jm
:-wr
// emit "set A to R"
::AR
AR
im
--AR
IA
im
##3.
DA
jm
:-wr
// emit "set A to S"
::AS
AS
im
--AS
IA
im
##3.
DA
jm
:-wr
// set B to register. takes rbx='A','B','C','D','I','J','R','S' outputs instruction to file
::Br
im
##41. 'A'
je
:-BA
im
##42. 'B'
je
:-r0 just return
im
##43. 'C'
je
:-BC
im
##44. 'D'
je
:-BD
im
##49. 'I'
je
:-BI
im
##4a. 'J'
je
:-BJ
im
##52. 'R'
je
:-BR
im
##53. 'S'
je
:-BS
jm
:-!r
// emit "set B to A"
::BA
BA
im
--BA
IA
im
##3.
DA
jm
:-wr
// emit "set B to C"
::BC
BC
im
--BC
IA
im
##3.
DA
jm
:-wr
// emit "set B to D"
::BD
BD
im
--BD
IA
im
##3.
DA
jm
:-wr
// emit "set B to I"
::BI
BI
im
--BI
IA
im
##3.
DA
jm
:-wr
// emit "set B to J"
::BJ
BJ
im
--BJ
IA
im
##3.
DA
jm
:-wr
// emit "set B to R"
::BR
BR
im
--BR
IA
im
##3.
DA
jm
:-wr
// emit "set B to S"
::BS
BS
im
--BS
IA
im
##3.
DA
jm
:-wr
// set register to A. takes rbx='A','B','C','D','I','J','R','S' outputs instruction to file
::rA
im
##41. 'A'
je
:-r0 just return
im
##42. 'B'
je
:-BA
im
##43. 'C'
je
:-CA
im
##44. 'D'
je
:-DA
im
##49. 'I'
je
:-IA
im
##4a. 'J'
je
:-JA
im
##52. 'R'
je
:-RA
im
##53. 'S'
je
:-SA
jm
:-!r
// emit "set C to A"
::CA
im
--C)
IA
im
##3.
DA
jm
:-wr
::C)
CA
// emit "set D to A"
::DA
DA
im
--DA
IA
im
##3.
DA
jm
:-wr
// emit "set I to A"
::IA
IA
im
--IA
IA
im
##3.
DA
jm
:-wr
// emit "set J to A"
::JA
JA
im
--JA
IA
im
##3.
DA
jm
:-wr
// emit "set R to A"
::RA
im
--R)
IA
im
##3.
DA
jm
:-wr
::R)
RA
// emit "set S to A"
::SA
im
--S)
IA
im
##3.
DA
jm
:-wr
::S)
SA
// write to output file from rsi..rsi+rdx
::wr
im
##4.
JA
im
##1.
sy
re
2021-11-12 16:51:28 -05:00
// return 0
::r0
zA
re
// return 1
::r1
im
##1.
re
2021-11-10 21:22:25 -05:00
// exit with code in rax
::ex
JA
im
##3c.
sy
2021-11-11 19:04:19 -05:00
// convert string representation of number starting at rbx and ending with a newline to number in rax
::nu
DB
im
##1.
+B
IA start by storing pointer to actual number (not including base) in rsi
BD
zA
lb
BA
im
##64. ascii 'd'
je
:-#d decimal
im
##78. ascii 'x'
je
:-#x hexadecimal
jm
:-bn unrecognized number base
// convert newline-terminated decimal representation in rsi to number in rax
::#d
zA
JA use rdi to store number
::dL decimal loop
BI
zA
lb
BA
im
##a.
je
:-d$ newline reached
im
##30.
jg
:-bn bad digit (<'0')
im
##39.
jl
:-bn bad digit (>'9')
im
##ffffffffffffffd0.
+B
CA put numerical value of digit in rcx
im
##a.
BA
AJ
+* multiply by 10
BC
+B add digit
JA
// increment rsi
BI
im
##1.
+B
IA
jm
:-dL keep looping
::d$
AJ
re return
2021-11-11 19:16:01 -05:00
::#x
zA
JA use rdi to store number
::xL hexadecimal loop
BI
zA
lb
BA
im
##a.
je
:-x$ newline reached
2021-11-10 21:22:25 -05:00
im
##30. compare with ascii '0'
jg
2021-11-11 19:04:19 -05:00
:-bn bad if < '0'
2021-11-10 21:22:25 -05:00
im
##39.
jl
:-af probably a-f
im
##ffffffffffffffd0. -48
jm
:-hX
::af
im
##61. ASCII 'a'
jg
2021-11-11 19:04:19 -05:00
:-bn bad digit (not 0-9, and less than 'a')
2021-11-10 21:22:25 -05:00
im
##66. ASCII 'f'
jl
2021-11-11 19:04:19 -05:00
:-bn bad digit (not 0-9, and greater than 'f')
2021-11-10 21:22:25 -05:00
im
##ffffffffffffffa9. -87 (10 - 'a')
::hX
+B
2021-11-11 19:16:01 -05:00
BA
// digit's numerical value now in rbx
AJ
<I
04
+B add digit
JA store away
// increment rsi
BI
im
##1.
+B
IA
jm
:-xL
::x$
AJ
re return
2021-11-11 19:04:19 -05:00
// bad number
2021-11-12 21:12:59 -05:00
::!n
2021-11-10 21:22:25 -05:00
im
2021-11-12 21:12:59 -05:00
--!N error message
2021-11-10 21:22:25 -05:00
IA
im
2021-11-11 19:16:01 -05:00
##b. length of error message
2021-11-10 21:22:25 -05:00
DA
jm
2021-11-12 21:12:59 -05:00
:-er
2021-11-12 16:51:28 -05:00
// bad label
2021-11-12 21:12:59 -05:00
::!l
2021-11-12 16:51:28 -05:00
im
2021-11-12 21:12:59 -05:00
--!L error message
2021-11-12 16:51:28 -05:00
IA
im
##a. length of error message
DA
2021-11-12 21:12:59 -05:00
jm
:-er
::!r bad register
im
--!R error message
IA
im
##d.
DA
jm
:-er
::er error -- write error message in rsi with length in rdx
im
##2. stderr
JA
2021-11-12 16:51:28 -05:00
im
##1. write
sy
im
##1.
jm
:-ex
2021-11-11 12:54:20 -05:00
// end of file
::ef
// TODO: second pass
zA exit code 0
jm
:-ex
::2P second pass?
00
::IF input file name
'i
'n
'0
'3
00
::OF output file name
'o
'u
't
'0
'3
00
2021-11-12 21:12:59 -05:00
::!N bad number error message
2021-11-11 12:54:20 -05:00
'B
'a
'd
20
2021-11-11 19:04:19 -05:00
'n
'u
'm
'b
'e
'r
2021-11-11 12:54:20 -05:00
\n
2021-11-12 21:12:59 -05:00
::!L bad label error message
2021-11-12 16:51:28 -05:00
'B
'a
'd
20
'l
'a
'b
'e
'l
\n
2021-11-12 21:12:59 -05:00
::!R bad register error message
'B
'a
'd
20
'r
'e
'g
'i
's
't
'e
'r
\n
2021-11-11 12:54:20 -05:00
::LI line buffer
~~
::L$ end of current label list
--LB
::LB labels
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~
~~