start C compiler

This commit is contained in:
pommicket 2022-01-07 23:32:27 -05:00
parent 262824b214
commit 5d6b490cce
7 changed files with 528 additions and 0 deletions

1
05/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
in04

11
05/Makefile Normal file
View file

@ -0,0 +1,11 @@
all: out04
in04: *.b ../04a/out04
../04a/out04 main.b in04
out04: in04 ../04/out03
../04/out03 in04 out04
%.html: %.md ../markdown
../markdown $<
%.out: %.c
./out04 $< $@
clean:
rm -f out* README.html *.out

32
05/constants.b Normal file
View file

@ -0,0 +1,32 @@
; #define KEYWORD_AUTO 101 (auto only exists in C for legacy reasons and doesn't appear in TCC's source code)
#define KEYWORD_DOUBLE 102
#define KEYWORD_INT 103
#define KEYWORD_STRUCT 104
#define KEYWORD_BREAK 105
#define KEYWORD_ELSE 106
#define KEYWORD_LONG 107
#define KEYWORD_SWITCH 108
#define KEYWORD_CASE 109
#define KEYWORD_ENUM 110
#define KEYWORD_REGISTER 111
#define KEYWORD_TYPEDEF 112
#define KEYWORD_CHAR 113
#define KEYWORD_EXTERN 114
#define KEYWORD_RETURN 115
#define KEYWORD_UNION 116
; #define KEYWORD_CONST 117 (we can just #define const)
#define KEYWORD_FLOAT 118
#define KEYWORD_SHORT 119
#define KEYWORD_UNSIGNED 120
#define KEYWORD_CONTINUE 121
#define KEYWORD_FOR 122
; #define KEYWORD_SIGNED 123 (again, just #define signed)
#define KEYWORD_VOID 124
#define KEYWORD_DEFAULT 125
#define KEYWORD_GOTO 126
#define KEYWORD_SIZEOF 127
; #define KEYWORD_VOLATILE 128 (just #define volatile if need be)
#define KEYWORD_DO 129
#define KEYWORD_IF 130
#define KEYWORD_STATIC 131
#define KEYWORD_WHILE 132

46
05/main.b Normal file
View file

@ -0,0 +1,46 @@
; add 24 + 16 = 40 to the stack pointer to put argc, argv in the right place
byte 0x48
byte 0x81
byte 0xc4
byte 40
byte 0
byte 0
byte 0
goto main
#include util.b
#include constants.b
#include preprocess.b
function main
argument argv2
argument argv1
argument argv0
argument argc
local input_filename
local output_filename
input_filename = .str_default_input_filename
output_filename = .str_default_output_filename
if argc == 1 goto have_filenames
if argc != 3 goto usage_error
input_filename = argv1
output_filename = argv2
:have_filenames
split_into_preprocessing_tokens(input_filename)
exit(0)
:usage_error
fputs(2, .str_usage_error)
exit(1)
:str_usage_error
string Please either specify no arguments or an input and output file.
:str_default_input_filename
string main.c
byte 0
:str_default_output_filename
string a.out
byte 0

6
05/main.c Normal file
View file

@ -0,0 +1,6 @@
test\
ing/*
I am */testing
that this is working
hello \
there.

75
05/preprocess.b Normal file
View file

@ -0,0 +1,75 @@
; returns a string of null character-separated preprocessing tokens
; this corresponds to translation phases 1-3 in the C89 standard
function split_into_preprocessing_tokens
argument filename
local fd
local file_contents
local pptokens
local p
local c
local in
local out
local n
fd = open_r(filename)
file_contents = malloc(2000000)
pptokens = malloc(2000000)
p = file_contents
:pptokens_read_loop
n = syscall(0, fd, p, 4096)
if n == 0 goto pptokens_read_loop_end
p += n
:pptokens_read_loop_end
; okay we read the file. first, delete every backslash-newline sequence (phase 2)
local newlines ; we add more newlines to keep line numbers right
newlines = 1
in = file_contents
out = file_contents
:backslashnewline_loop
c = *1in
if c == 0 goto backslashnewline_loop_end
if c == 10 goto proper_newline_loop
if c != '\ goto not_backslashnewline
p = in + 1
c = *1p
if c != 10 goto not_backslashnewline
in += 2 ; skip backlash and newline
newlines += 1 ; add one additional newline the next time around to compensate
goto backslashnewline_loop
:not_backslashnewline
*1out = *1in
out += 1
in += 1
goto backslashnewline_loop
:proper_newline_loop
if newlines == 0 goto proper_newline_loop_end
; output a newline
*1out = 10
out += 1
newlines -= 1
goto proper_newline_loop
:proper_newline_loop_end
newlines = 1
in += 1
goto backslashnewline_loop
:backslashnewline_loop_end
*1out = 0
in = file_contents
fputs(1, file_contents)
free(file_contents)
close(fd)
return
:unterminated_comment
fputs(2, .str_unterminated_comment)
fputs(2, filename)
fputc(2, 10)
exit(1)
:str_unterminated_comment
string Unterminated comment in file
byte 32
byte 0

357
05/util.b Normal file
View file

@ -0,0 +1,357 @@
function file_error
argument name
fputs(2, .str_file_error)
fputs(2, name)
fputc(2, 10)
exit(1)
:str_file_error
string Error opening file:
byte 32
byte 0
function malloc
argument size
local total_size
local memory
total_size = size + 8
memory = syscall(9, 0, total_size, 3, 0x22, -1, 0)
if memory ] 0xffffffffffff0000 goto malloc_failed
*8memory = total_size
return memory + 8
:malloc_failed
fputs(2, .str_out_of_memory)
exit(1)
:str_out_of_memory
string Out of memory.
byte 10
byte 0
function free
argument memory
local psize
local size
psize = memory - 8
size = *8psize
syscall(11, psize, size)
return
; returns a pointer to a null-terminated string containing the number given
function itos
global 32 itos_string
argument x
local c
local p
p = &itos_string
p += 30
:itos_loop
c = x % 10
c += '0
*1p = c
x /= 10
if x == 0 goto itos_loop_end
p -= 1
goto itos_loop
:itos_loop_end
return p
; returns the number at the start of the given string
function stoi
argument s
local p
local n
local c
n = 0
p = s
:stoi_loop
c = *1p
if c < '0 goto stoi_loop_end
if c > '9 goto stoi_loop_end
n *= 10
n += c - '0
p += 1
goto stoi_loop
:stoi_loop_end
return n
function memchr
argument mem
argument c
local p
local a
p = mem
:memchr_loop
a = *1p
if a == c goto memchr_loop_end
p += 1
goto memchr_loop
:memchr_loop_end
return p
function strlen
argument s
local c
local p
p = s
:strlen_loop
c = *1p
if c == 0 goto strlen_loop_end
p += 1
goto strlen_loop
:strlen_loop_end
return p - s
function strcpy
argument dest
argument src
local p
local q
local c
p = dest
q = src
:strcpy_loop
c = *1q
*1p = c
if c == 0 goto strcpy_loop_end
p += 1
q += 1
goto strcpy_loop
:strcpy_loop_end
return p
function str_startswith
argument s
argument prefix
local p
local q
local c1
local c2
p = s
q = prefix
:str_startswith_loop
c1 = *1p
c2 = *1q
if c2 == 0 goto return_1
if c1 != c2 goto return_0
p += 1
q += 1
goto str_startswith_loop
function fputs
argument fd
argument s
local length
length = strlen(s)
syscall(1, fd, s, length)
return
function puts
argument s
fputs(1, s)
return
function fputn
argument fd
argument n
local s
s = itos(n)
fputs(fd, s)
return
function fputc
argument fd
argument c
local p
p = &c
syscall(1, fd, p, 1)
return
function putc
argument c
fputc(1, c)
return
; returns 0 at end of file
function fgetc
argument fd
local c
local p
c = 0
p = &c
syscall(0, fd, p, 1)
return c
; read a line from fd as a null-terminated string
; returns 0 at end of file, 1 otherwise
function fgets
argument fd
argument buf
argument size
local p
local end
local c
p = buf
end = buf + size
:fgets_loop
c = fgetc(fd)
if c == 0 goto fgets_eof
if c == 10 goto fgets_eol
*1p = c
p += 1
if p == end goto fgets_eob
goto fgets_loop
:fgets_eol ; end of line
*1p = 0
return 1
:fgets_eof ; end of file
*1p = 0
return 0
:fgets_eob ; end of buffer
p -= 1
*1p = 0
return 1
; open the given file for reading
function open_r
argument filename
local fd
fd = syscall(2, filename, 0)
if fd < 0 goto open_r_error
return fd
:open_r_error
file_error(filename)
return -1
; open the given file for writing with the given mode
function open_w
argument filename
argument mode
local fd
fd = syscall(2, filename, 0x241, mode)
if fd < 0 goto open_w_error
return fd
:open_w_error
file_error(filename)
return -1
function close
argument fd
syscall(3, fd)
return
function isupper
argument c
if c < 'A goto return_0
if c <= 'Z goto return_1
goto return_0
function exit
argument status_code
syscall(0x3c, status_code)
:return_0
return 0
:return_1
return 1
function syscall
; I've done some testing, and this should be okay even if
; rbp-56 goes beyond the end of the stack.
; mov rax, [rbp-16]
byte 0x48
byte 0x8b
byte 0x85
byte 0xf0
byte 0xff
byte 0xff
byte 0xff
; mov rdi, rax
byte 0x48
byte 0x89
byte 0xc7
; mov rax, [rbp-24]
byte 0x48
byte 0x8b
byte 0x85
byte 0xe8
byte 0xff
byte 0xff
byte 0xff
; mov rsi, rax
byte 0x48
byte 0x89
byte 0xc6
; mov rax, [rbp-32]
byte 0x48
byte 0x8b
byte 0x85
byte 0xe0
byte 0xff
byte 0xff
byte 0xff
; mov rdx, rax
byte 0x48
byte 0x89
byte 0xc2
; mov rax, [rbp-40]
byte 0x48
byte 0x8b
byte 0x85
byte 0xd8
byte 0xff
byte 0xff
byte 0xff
; mov r10, rax
byte 0x49
byte 0x89
byte 0xc2
; mov rax, [rbp-48]
byte 0x48
byte 0x8b
byte 0x85
byte 0xd0
byte 0xff
byte 0xff
byte 0xff
; mov r8, rax
byte 0x49
byte 0x89
byte 0xc0
; mov rax, [rbp-56]
byte 0x48
byte 0x8b
byte 0x85
byte 0xc8
byte 0xff
byte 0xff
byte 0xff
; mov r9, rax
byte 0x49
byte 0x89
byte 0xc1
; mov rax, [rbp-8]
byte 0x48
byte 0x8b
byte 0x85
byte 0xf8
byte 0xff
byte 0xff
byte 0xff
; syscall
byte 0x0f
byte 0x05
return