preprocessing tokens
This commit is contained in:
parent
d48816e226
commit
ac6cb985db
5 changed files with 452 additions and 28 deletions
|
@ -30,3 +30,79 @@
|
|||
#define KEYWORD_IF 130
|
||||
#define KEYWORD_STATIC 131
|
||||
#define KEYWORD_WHILE 132
|
||||
|
||||
:str_comment_start
|
||||
string /*
|
||||
byte 0
|
||||
:str_comment_end
|
||||
string */
|
||||
byte 0
|
||||
:str_lshift_eq
|
||||
string <<=
|
||||
byte 0
|
||||
:str_rshift_eq
|
||||
string >>=
|
||||
byte 0
|
||||
:str_eq_eq
|
||||
string ==
|
||||
byte 0
|
||||
:str_not_eq
|
||||
string !=
|
||||
byte 0
|
||||
:str_gt_eq
|
||||
string >=
|
||||
byte 0
|
||||
:str_lt_eq
|
||||
string <=
|
||||
byte 0
|
||||
:str_plus_plus
|
||||
string ++
|
||||
byte 0
|
||||
:str_minus_minus
|
||||
string --
|
||||
byte 0
|
||||
:str_plus_eq
|
||||
string +=
|
||||
byte 0
|
||||
:str_minus_eq
|
||||
string -=
|
||||
byte 0
|
||||
:str_times_eq
|
||||
string *=
|
||||
byte 0
|
||||
:str_div_eq
|
||||
string /=
|
||||
byte 0
|
||||
:str_remainder_eq
|
||||
string %=
|
||||
byte 0
|
||||
:str_and_eq
|
||||
string &=
|
||||
byte 0
|
||||
:str_or_eq
|
||||
string |=
|
||||
byte 0
|
||||
:str_xor_eq
|
||||
string ^=
|
||||
byte 0
|
||||
:str_and_and
|
||||
string &&
|
||||
byte 0
|
||||
:str_or_or
|
||||
string ||
|
||||
byte 0
|
||||
:str_lshift
|
||||
string <<
|
||||
byte 0
|
||||
:str_rshift
|
||||
string >>
|
||||
byte 0
|
||||
:str_arrow
|
||||
string ->
|
||||
byte 0
|
||||
:str_dotdotdot
|
||||
string ...
|
||||
byte 0
|
||||
:str_hash_hash
|
||||
string ##
|
||||
byte 0
|
||||
|
|
21
05/main.b
21
05/main.b
|
@ -8,6 +8,23 @@ byte 0
|
|||
byte 0
|
||||
goto main
|
||||
|
||||
function compile_error
|
||||
argument file
|
||||
argument line
|
||||
argument message
|
||||
fputs(2, file)
|
||||
fputc(2, ':)
|
||||
fputn(2, line)
|
||||
fputs(2, .str_error)
|
||||
fputs(2, message)
|
||||
fputc(2, 10)
|
||||
exit(1)
|
||||
|
||||
:str_error
|
||||
string : Error:
|
||||
byte 32
|
||||
byte 0
|
||||
|
||||
#include util.b
|
||||
#include constants.b
|
||||
#include preprocess.b
|
||||
|
@ -19,6 +36,7 @@ function main
|
|||
argument argc
|
||||
local input_filename
|
||||
local output_filename
|
||||
local pptokens
|
||||
|
||||
input_filename = .str_default_input_filename
|
||||
output_filename = .str_default_output_filename
|
||||
|
@ -27,7 +45,8 @@ function main
|
|||
input_filename = argv1
|
||||
output_filename = argv2
|
||||
:have_filenames
|
||||
split_into_preprocessing_tokens(input_filename)
|
||||
pptokens = split_into_preprocessing_tokens(input_filename)
|
||||
print_pptokens(pptokens)
|
||||
exit(0)
|
||||
|
||||
:usage_error
|
||||
|
|
20
05/main.c
20
05/main.c
|
@ -1,6 +1,14 @@
|
|||
test\
|
||||
ing/*
|
||||
I am */testing
|
||||
that this is working
|
||||
hello \
|
||||
there.
|
||||
#include <stdio.h>
|
||||
|
||||
int test(int, double, ...);\
|
||||
/* here is a nice
|
||||
comment it is
|
||||
here
|
||||
*/
|
||||
int main(void) {
|
||||
printf("\"Hello, world!%c\n\"", '\'');
|
||||
_X55 = Y4_C_;
|
||||
a.b = c;
|
||||
5 + (.3e+5+6) & 0xff | 93 -~5;
|
||||
return 0;
|
||||
}
|
||||
|
|
303
05/preprocess.b
303
05/preprocess.b
|
@ -6,10 +6,12 @@ function split_into_preprocessing_tokens
|
|||
local file_contents
|
||||
local pptokens
|
||||
local p
|
||||
local b
|
||||
local c
|
||||
local in
|
||||
local out
|
||||
local n
|
||||
local line_number
|
||||
|
||||
fd = open_r(filename)
|
||||
file_contents = malloc(2000000)
|
||||
|
@ -19,6 +21,7 @@ function split_into_preprocessing_tokens
|
|||
n = syscall(0, fd, p, 4096)
|
||||
if n == 0 goto pptokens_read_loop_end
|
||||
p += n
|
||||
goto pptokens_read_loop
|
||||
:pptokens_read_loop_end
|
||||
|
||||
; okay we read the file. first, delete every backslash-newline sequence (phase 2)
|
||||
|
@ -56,20 +59,304 @@ function split_into_preprocessing_tokens
|
|||
:backslashnewline_loop_end
|
||||
*1out = 0
|
||||
|
||||
; split file into preprocessing tokens, remove comments (phase 3)
|
||||
; we're still doing the trick with newlines, this time for ones inside comments
|
||||
; this is needed because the following is legal C:
|
||||
; #include/*
|
||||
; */<stdio.h>
|
||||
; and is not equivalent to:
|
||||
; #include
|
||||
; <stdio.h>
|
||||
newlines = 1
|
||||
in = file_contents
|
||||
out = pptokens
|
||||
line_number = 1
|
||||
:pptokens_loop
|
||||
c = *1in
|
||||
if c == 10 goto pptokens_newline_loop
|
||||
if c == 0 goto pptokens_loop_end
|
||||
if c == 32 goto pptoken_space
|
||||
if c == 9 goto pptoken_space
|
||||
b = isdigit(c)
|
||||
if b != 0 goto pptoken_number
|
||||
b = isalpha_or_underscore(c)
|
||||
if b != 0 goto pptoken_identifier
|
||||
b = str_startswith(in, .str_comment_start)
|
||||
if b != 0 goto pptoken_comment
|
||||
; now we check for all the various operators and symbols in C
|
||||
|
||||
fputs(1, file_contents)
|
||||
if c == 59 goto pptoken_single_character ; semicolon
|
||||
if c == '( goto pptoken_single_character
|
||||
if c == ') goto pptoken_single_character
|
||||
if c == '[ goto pptoken_single_character
|
||||
if c == '] goto pptoken_single_character
|
||||
if c == '{ goto pptoken_single_character
|
||||
if c == '} goto pptoken_single_character
|
||||
if c == ', goto pptoken_single_character
|
||||
if c == '~ goto pptoken_single_character
|
||||
if c == '? goto pptoken_single_character
|
||||
if c == ': goto pptoken_single_character
|
||||
if c == '" goto pptoken_string_or_char_literal
|
||||
if c == '' goto pptoken_string_or_char_literal
|
||||
b = str_startswith(in, .str_lshift_eq)
|
||||
if b != 0 goto pptoken_3_chars
|
||||
b = str_startswith(in, .str_rshift_eq)
|
||||
if b != 0 goto pptoken_3_chars
|
||||
b = str_startswith(in, .str_eq_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_not_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_gt_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_lt_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_plus_plus)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_minus_minus)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_plus_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_minus_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_times_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_div_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_remainder_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_and_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_or_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_xor_eq)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_and_and)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_or_or)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_lshift)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_rshift)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_arrow)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
b = str_startswith(in, .str_dotdotdot)
|
||||
if b != 0 goto pptoken_3_chars
|
||||
b = str_startswith(in, .str_hash_hash)
|
||||
if b != 0 goto pptoken_2_chars
|
||||
if c == '+ goto pptoken_single_character
|
||||
if c == '- goto pptoken_single_character
|
||||
if c == '* goto pptoken_single_character
|
||||
if c == '/ goto pptoken_single_character
|
||||
if c == '% goto pptoken_single_character
|
||||
if c == '& goto pptoken_single_character
|
||||
if c == '| goto pptoken_single_character
|
||||
if c == '^ goto pptoken_single_character
|
||||
if c == '> goto pptoken_single_character
|
||||
if c == '< goto pptoken_single_character
|
||||
if c == '! goto pptoken_single_character
|
||||
if c == '= goto pptoken_single_character
|
||||
if c == '# goto pptoken_single_character
|
||||
if c == '. goto pptoken_dot
|
||||
|
||||
goto bad_pptoken
|
||||
|
||||
:pptoken_comment
|
||||
; emit a space ("Each comment is replaced by one space character.")
|
||||
*1out = 32
|
||||
out += 1
|
||||
*1out = 0
|
||||
out += 1
|
||||
; skip over comment
|
||||
:pptoken_comment_loop
|
||||
b = str_startswith(in, .str_comment_end)
|
||||
if b != 0 goto pptoken_comment_loop_end
|
||||
c = *1in
|
||||
in += 1
|
||||
if c == 0 goto unterminated_comment
|
||||
if c == 10 goto pptoken_comment_newline
|
||||
goto pptoken_comment_loop
|
||||
:pptoken_comment_loop_end
|
||||
in += 2 ; skip */
|
||||
goto pptokens_loop
|
||||
:pptoken_comment_newline
|
||||
; keep line numbers correct
|
||||
newlines += 1
|
||||
goto pptoken_comment_loop
|
||||
:pptoken_dot
|
||||
; could just be a . or could be .3 -- we need to check if *(in+1) is a digit
|
||||
p = in + 1
|
||||
b = isdigit(*1p)
|
||||
if b != 0 goto pptoken_number
|
||||
; okay it's just a dot
|
||||
goto pptoken_single_character
|
||||
:pptoken_string_or_char_literal
|
||||
local delimiter
|
||||
local backslash
|
||||
delimiter = c
|
||||
backslash = 0
|
||||
*1out = c
|
||||
out += 1
|
||||
in += 1
|
||||
:pptoken_strchar_loop
|
||||
c = *1in
|
||||
*1out = c
|
||||
in += 1
|
||||
out += 1
|
||||
if c == '\ goto pptoken_strchar_backslash
|
||||
if c == 10 goto unterminated_string
|
||||
if c == 0 goto unterminated_string
|
||||
b = backslash
|
||||
backslash = 0
|
||||
if b == 1 goto pptoken_strchar_loop ; string can't end with an odd number of backslashes
|
||||
if c == delimiter goto pptoken_strchar_loop_end
|
||||
goto pptoken_strchar_loop
|
||||
:pptoken_strchar_backslash
|
||||
backslash ^= 1
|
||||
goto pptoken_strchar_loop
|
||||
:pptoken_strchar_loop_end
|
||||
*1out = 0
|
||||
out += 1
|
||||
goto pptokens_loop
|
||||
:pptoken_number
|
||||
c = *1in
|
||||
b = is_ppnumber_char(c)
|
||||
if b == 0 goto pptoken_number_end
|
||||
*1out = c
|
||||
out += 1
|
||||
in += 1
|
||||
if c == 'e goto pptoken_number_e
|
||||
if c == 'E goto pptoken_number_e
|
||||
goto pptoken_number
|
||||
:pptoken_number_e
|
||||
c = *1in
|
||||
if c == '+ goto pptoken_number_sign
|
||||
if c == '- goto pptoken_number_sign
|
||||
goto pptoken_number
|
||||
:pptoken_number_sign
|
||||
; special code to handle + - immediately following e
|
||||
*1out = c
|
||||
in += 1
|
||||
out += 1
|
||||
goto pptoken_number
|
||||
:pptoken_number_end
|
||||
*1out = 0
|
||||
out += 1
|
||||
goto pptokens_loop
|
||||
:pptoken_identifier
|
||||
c = *1in
|
||||
b = isalnum_or_underscore(c)
|
||||
if b == 0 goto pptoken_identifier_end
|
||||
*1out = c
|
||||
in += 1
|
||||
out += 1
|
||||
goto pptoken_identifier
|
||||
:pptoken_identifier_end
|
||||
*1out = 0
|
||||
out += 1
|
||||
goto pptokens_loop
|
||||
:pptoken_space
|
||||
; space character token
|
||||
*1out = 32
|
||||
in += 1
|
||||
out += 1
|
||||
*1out = 0
|
||||
out += 1
|
||||
goto pptokens_loop
|
||||
:pptoken_single_character
|
||||
; a single character preprocessing token, like {?}
|
||||
*1out = c
|
||||
in += 1
|
||||
out += 1
|
||||
*1out = 0
|
||||
out += 1
|
||||
goto pptokens_loop
|
||||
:pptoken_2_chars
|
||||
; two-character pptoken (e.g. ##)
|
||||
*1out = c
|
||||
in += 1
|
||||
out += 1
|
||||
*1out = *1in
|
||||
in += 1
|
||||
out += 1
|
||||
*1out = 0
|
||||
out += 1
|
||||
goto pptokens_loop
|
||||
:pptoken_3_chars
|
||||
; three-character pptoken (e.g. >>=)
|
||||
*1out = c
|
||||
in += 1
|
||||
out += 1
|
||||
*1out = *1in
|
||||
in += 1
|
||||
out += 1
|
||||
*1out = *1in
|
||||
in += 1
|
||||
out += 1
|
||||
*1out = 0
|
||||
out += 1
|
||||
goto pptokens_loop
|
||||
:pptokens_newline_loop
|
||||
if newlines == 0 goto pptokens_newline_loop_end
|
||||
; output a newline
|
||||
*1out = 10
|
||||
out += 1
|
||||
*1out = 0
|
||||
out += 1
|
||||
line_number += 1
|
||||
newlines -= 1
|
||||
goto pptokens_newline_loop
|
||||
:pptokens_newline_loop_end
|
||||
newlines = 1
|
||||
in += 1
|
||||
goto pptokens_loop
|
||||
:pptokens_loop_end
|
||||
|
||||
free(file_contents)
|
||||
close(fd)
|
||||
return
|
||||
return pptokens
|
||||
|
||||
:unterminated_comment
|
||||
fputs(2, .str_unterminated_comment)
|
||||
fputs(2, filename)
|
||||
fputc(2, 10)
|
||||
exit(1)
|
||||
compile_error(filename, line_number, .str_unterminated_comment)
|
||||
:str_unterminated_comment
|
||||
string Unterminated comment in file
|
||||
byte 32
|
||||
string Unterminated comment.
|
||||
byte 0
|
||||
:unterminated_string
|
||||
compile_error(filename, line_number, .str_unterminated_string)
|
||||
:str_unterminated_string
|
||||
string Unterminated string or character literal.
|
||||
byte 0
|
||||
:bad_pptoken
|
||||
compile_error(filename, line_number, .str_bad_pptoken)
|
||||
:str_bad_pptoken
|
||||
string Bad preprocessing token.
|
||||
byte 0
|
||||
|
||||
; can the given character appear in a C89 ppnumber?
|
||||
function is_ppnumber_char
|
||||
argument c
|
||||
if c == '. goto return_1
|
||||
if c < '0 goto return_0
|
||||
if c <= '9 goto return_1
|
||||
if c < 'A goto return_0
|
||||
if c <= 'Z goto return_1
|
||||
if c == '_ goto return_1
|
||||
if c < 'a goto return_0
|
||||
if c <= 'z goto return_1
|
||||
goto return_0
|
||||
|
||||
function print_pptokens
|
||||
argument pptokens
|
||||
local p
|
||||
p = pptokens
|
||||
:print_pptokens_loop
|
||||
if *1p == 0 goto print_pptokens_loop_end
|
||||
putc('{)
|
||||
puts(p)
|
||||
putc('})
|
||||
p += strlen(p)
|
||||
p += 1
|
||||
goto print_pptokens_loop
|
||||
:print_pptokens_loop_end
|
||||
putc(10)
|
||||
return
|
||||
|
|
58
05/util.b
58
05/util.b
|
@ -82,11 +82,9 @@ function memchr
|
|||
argument mem
|
||||
argument c
|
||||
local p
|
||||
local a
|
||||
p = mem
|
||||
:memchr_loop
|
||||
a = *1p
|
||||
if a == c goto memchr_loop_end
|
||||
if *1p == c goto memchr_loop_end
|
||||
p += 1
|
||||
goto memchr_loop
|
||||
:memchr_loop_end
|
||||
|
@ -94,12 +92,10 @@ function memchr
|
|||
|
||||
function strlen
|
||||
argument s
|
||||
local c
|
||||
local p
|
||||
p = s
|
||||
:strlen_loop
|
||||
c = *1p
|
||||
if c == 0 goto strlen_loop_end
|
||||
if *1p == 0 goto strlen_loop_end
|
||||
p += 1
|
||||
goto strlen_loop
|
||||
:strlen_loop_end
|
||||
|
@ -165,9 +161,7 @@ function fputn
|
|||
function fputc
|
||||
argument fd
|
||||
argument c
|
||||
local p
|
||||
p = &c
|
||||
syscall(1, fd, p, 1)
|
||||
syscall(1, fd, &c, 1)
|
||||
return
|
||||
|
||||
function putc
|
||||
|
@ -179,10 +173,8 @@ function putc
|
|||
function fgetc
|
||||
argument fd
|
||||
local c
|
||||
local p
|
||||
c = 0
|
||||
p = &c
|
||||
syscall(0, fd, p, 1)
|
||||
syscall(0, fd, &c, 1)
|
||||
return c
|
||||
|
||||
; read a line from fd as a null-terminated string
|
||||
|
@ -251,6 +243,48 @@ function isupper
|
|||
if c <= 'Z goto return_1
|
||||
goto return_0
|
||||
|
||||
function islower
|
||||
argument c
|
||||
if c < 'a goto return_0
|
||||
if c <= 'z goto return_1
|
||||
goto return_0
|
||||
|
||||
function isdigit
|
||||
argument c
|
||||
if c < '0 goto return_0
|
||||
if c <= '9 goto return_1
|
||||
goto return_0
|
||||
|
||||
function isalpha
|
||||
argument c
|
||||
if c < 'A goto return_0
|
||||
if c <= 'Z goto return_1
|
||||
if c < 'a goto return_0
|
||||
if c <= 'z goto return_1
|
||||
goto return_0
|
||||
|
||||
; characters which can start identifiers in C
|
||||
function isalpha_or_underscore
|
||||
argument c
|
||||
if c < 'A goto return_0
|
||||
if c <= 'Z goto return_1
|
||||
if c == '_ goto return_1
|
||||
if c < 'a goto return_0
|
||||
if c <= 'z goto return_1
|
||||
goto return_0
|
||||
|
||||
; characters which can appear in identifiers in C
|
||||
function isalnum_or_underscore
|
||||
argument c
|
||||
if c < '0 goto return_0
|
||||
if c <= '9 goto return_1
|
||||
if c < 'A goto return_0
|
||||
if c <= 'Z goto return_1
|
||||
if c == '_ goto return_1
|
||||
if c < 'a goto return_0
|
||||
if c <= 'z goto return_1
|
||||
goto return_0
|
||||
|
||||
function exit
|
||||
argument status_code
|
||||
syscall(0x3c, status_code)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue