2022-01-11 15:55:37 -05:00
; this is the format of the executables we produce:
2022-01-27 18:52:39 -05:00
; elf header 4MB addresses 0x000000 - 0x400000 (no , it won't actually take up that much space)
; code 4MB addresses 0x400000 - 0x7fffff
2022-01-11 15:55:37 -05:00
; read - only data 4MB addresses 0x800000 - 0xbfffff
; read - write data 4MB addresses 0xc00000 - 0xffffff
2022-01-27 18:52:39 -05:00
; note that file offsets and runtime addresses are the same .
; you should be able to change these constants without breaking anything:
2022-01-11 15:55:37 -05:00
#define RODATA_ADDR 0x800000
2022-01-28 15:07:23 -05:00
#define RWDATA_ADDR 0xc00000
2022-01-27 18:52:39 -05:00
#define RWDATA_END 0x1000000
#define EXECUTABLE_SIZE 0x1000000
2022-01-11 15:55:37 -05:00
2022-02-04 23:21:33 -05:00
; "* 15 nesting levels of compound statements , iteration control structures , and selection control structures" C89 § 2 . 2 . 4 . 1
#define BLOCK_DEPTH_LIMIT 16
2022-01-11 00:09:11 -05:00
; C OPERATOR PRECEDENCE
; lowest
; 1 ,
; 2 = + = - = *= /= %= << = >> = &= ^= |=
; 3 ? ... :
; 4 ||
; 5 &&
; 6 |
; 7 ^
; 8 &
; 9 == !=
; a < > < = > =
; b << >>
; c + -
; d * / %
2022-02-02 23:05:33 -05:00
; e casts , sizeof , unary prefixes ++ -- & * + - ~ !
2022-01-11 00:09:11 -05:00
; f . - > () [ ] postfix ++ --
; highest
; NB: for equal precedence , operators are applied left - to - right except for assignment operators (precedence 2)
2022-01-08 12:15:17 -05:00
2022-02-04 19:31:45 -05:00
; TOKENS
; tokens are 16 bytes and have the following format:
; uchar type
; uchar info
; ushort file
; uint line
; ulong data -- for int/float literals , the value; for string literals , the runtime address; for identifiers , the name of the identifier
2022-01-11 11:53:36 -05:00
#define SYMBOL_COMMA 200
2022-01-14 13:41:44 -05:00
; NOTE: operator_right_associative and others require SYMBOL_EQ to be the first assignment operator
2022-01-11 11:53:36 -05:00
#define SYMBOL_EQ 201
#define SYMBOL_PLUS_EQ 202
#define SYMBOL_MINUS_EQ 203
#define SYMBOL_TIMES_EQ 204
#define SYMBOL_DIV_EQ 205
#define SYMBOL_PERCENT_EQ 206
#define SYMBOL_LSHIFT_EQ 207
#define SYMBOL_RSHIFT_EQ 208
#define SYMBOL_AND_EQ 209
#define SYMBOL_XOR_EQ 210
#define SYMBOL_OR_EQ 211
2022-01-14 13:41:44 -05:00
; NOTE: operator_right_associative and others require SYMBOL_OR_EQ to be the last assignment operator
2022-01-11 11:53:36 -05:00
#define SYMBOL_QUESTION 212
#define SYMBOL_OR_OR 213
#define SYMBOL_AND_AND 214
#define SYMBOL_OR 215
#define SYMBOL_XOR 216
#define SYMBOL_AND 217
#define SYMBOL_EQ_EQ 218
#define SYMBOL_NOT_EQ 219
#define SYMBOL_LT 220
#define SYMBOL_GT 221
#define SYMBOL_LT_EQ 222
#define SYMBOL_GT_EQ 223
#define SYMBOL_LSHIFT 224
#define SYMBOL_RSHIFT 225
#define SYMBOL_PLUS 226
#define SYMBOL_MINUS 227
#define SYMBOL_TIMES 228
#define SYMBOL_DIV 229
#define SYMBOL_PERCENT 230
#define SYMBOL_PLUS_PLUS 231
#define SYMBOL_MINUS_MINUS 232
#define SYMBOL_NOT 233
#define SYMBOL_TILDE 234
#define SYMBOL_ARROW 235
#define SYMBOL_DOTDOTDOT 236
#define SYMBOL_COLON 237
#define SYMBOL_LBRACE 238
#define SYMBOL_RBRACE 239
#define SYMBOL_LSQUARE 240
#define SYMBOL_RSQUARE 241
#define SYMBOL_LPAREN 242
#define SYMBOL_RPAREN 243
#define SYMBOL_SEMICOLON 244
2022-01-13 18:13:29 -05:00
#define SYMBOL_DOT 245
2022-01-11 00:09:11 -05:00
2022-02-04 19:31:45 -05:00
2022-01-11 00:09:11 -05:00
#define TOKEN_IDENTIFIER 1
#define TOKEN_CONSTANT_FLOAT 2
#define TOKEN_CONSTANT_INT 3
#define TOKEN_CONSTANT_CHAR 4
2022-01-11 15:55:37 -05:00
#define TOKEN_STRING_LITERAL 5
2022-01-13 16:12:28 -05:00
#define TOKEN_EOF 6
2022-01-11 00:09:11 -05:00
2022-01-11 14:03:13 -05:00
; these are stored in the "info" field of the token
#define NUMBER_NO_SUFFIX 0
#define NUMBER_SUFFIX_U 1
#define NUMBER_SUFFIX_L 2
#define NUMBER_SUFFIX_UL 3
#define NUMBER_SUFFIX_F 4
2022-01-11 00:09:11 -05:00
; #define KEYWORD_AUTO 21 (@NONSTANDARD auto only exists in C for legacy reasons and doesn't appear in TCC's source code)
#define KEYWORD_DOUBLE 22
#define KEYWORD_INT 23
#define KEYWORD_STRUCT 24
#define KEYWORD_BREAK 25
#define KEYWORD_ELSE 26
#define KEYWORD_LONG 27
#define KEYWORD_SWITCH 28
#define KEYWORD_CASE 29
#define KEYWORD_ENUM 30
; #define KEYWORD_REGISTER 31 (we can just #define register)
#define KEYWORD_TYPEDEF 32
#define KEYWORD_CHAR 33
#define KEYWORD_EXTERN 34
#define KEYWORD_RETURN 35
#define KEYWORD_UNION 36
; #define KEYWORD_CONST 37 (we can just #define const)
#define KEYWORD_FLOAT 38
#define KEYWORD_SHORT 39
#define KEYWORD_UNSIGNED 40
#define KEYWORD_CONTINUE 41
#define KEYWORD_FOR 42
; #define KEYWORD_SIGNED 43 (again , just #define signed)
#define KEYWORD_VOID 44
#define KEYWORD_DEFAULT 45
#define KEYWORD_GOTO 46
#define KEYWORD_SIZEOF 47
; #define KEYWORD_VOLATILE 48 (just #define volatile if need be)
#define KEYWORD_DO 49
#define KEYWORD_IF 50
#define KEYWORD_STATIC 51
#define KEYWORD_WHILE 52
2022-01-13 16:12:28 -05:00
; the format of expression headers is:
; uchar kind (one of the constants below)
; uchar info
; ushort (padding)
2022-01-21 23:24:18 -05:00
; uint type
2022-01-13 16:12:28 -05:00
; immediately following the header in memory are the arguments of the expression
2022-02-02 09:49:56 -05:00
; - for global variables , the 64 - bit runtime address
2022-01-13 16:12:28 -05:00
; - for constant ints , the 64 - bit integral value
; - for constant floats , the 64 - bit double value (even if expression has type float)
; - for unary operators , the operand
2022-01-24 18:27:09 -05:00
; - for casts , the operand (type is given by type member)
2022-01-13 16:12:28 -05:00
; - for binary operators , the first operand followed by the second
2022-01-27 10:57:18 -05:00
; - for the operators . and - > , the first argument is the expression on the left - hand side , and the second argument is a 64 - bit offset .
; we could use a 32 - bit offset but that would cause things to be unaligned .
2022-01-13 16:12:28 -05:00
; - for the ternary operator ? : , the first followed by the second followed by the third
; - for function calls , the function , followed by each of the arguments to the function — info indicates the number of arguments
2022-02-02 11:57:11 -05:00
; File/line number are not stored in expressions .
; Note that string literals are stored as constant integers (you can check the type to know what it is)
2022-02-02 09:49:56 -05:00
#define EXPRESSION_GLOBAL_VARIABLE 200
2022-01-13 16:12:28 -05:00
#define EXPRESSION_CONSTANT_INT 201
#define EXPRESSION_CONSTANT_FLOAT 202
#define EXPRESSION_SUBSCRIPT 204
#define EXPRESSION_CALL 205
#define EXPRESSION_DOT 206
#define EXPRESSION_ARROW 207
#define EXPRESSION_POST_INCREMENT 208
#define EXPRESSION_POST_DECREMENT 209
#define EXPRESSION_PRE_INCREMENT 210
#define EXPRESSION_PRE_DECREMENT 211
#define EXPRESSION_ADDRESS_OF 212
#define EXPRESSION_DEREFERENCE 213
; this matters for promotion . if x is a char , sizeof( + x) should be sizeof(int)
#define EXPRESSION_UNARY_PLUS 214
#define EXPRESSION_UNARY_MINUS 215
#define EXPRESSION_BITWISE_NOT 216
2022-01-14 10:08:02 -05:00
#define EXPRESSION_LOGICAL_NOT 217
2022-01-13 16:12:28 -05:00
#define EXPRESSION_SIZEOF 218
#define EXPRESSION_CAST 219
#define EXPRESSION_MUL 220
#define EXPRESSION_DIV 221
#define EXPRESSION_REMAINDER 222
#define EXPRESSION_ADD 223
#define EXPRESSION_SUB 224
#define EXPRESSION_LSHIFT 225
#define EXPRESSION_RSHIFT 226
#define EXPRESSION_LT 227
#define EXPRESSION_GT 228
#define EXPRESSION_LEQ 229
#define EXPRESSION_GEQ 230
#define EXPRESSION_EQ 231
#define EXPRESSION_NEQ 232
#define EXPRESSION_BITWISE_AND 233
#define EXPRESSION_BITWISE_XOR 234
#define EXPRESSION_BITWISE_OR 235
2022-01-13 20:29:52 -05:00
#define EXPRESSION_LOGICAL_AND 236
#define EXPRESSION_LOGICAL_OR 237
2022-01-13 16:12:28 -05:00
; e . g . x == 5 ? 6 : 7
#define EXPRESSION_CONDITIONAL 238
#define EXPRESSION_ASSIGN 239
#define EXPRESSION_ASSIGN_ADD 240
#define EXPRESSION_ASSIGN_SUB 241
#define EXPRESSION_ASSIGN_MUL 242
#define EXPRESSION_ASSIGN_DIV 243
#define EXPRESSION_ASSIGN_REMAINDER 244
#define EXPRESSION_ASSIGN_LSHIFT 245
#define EXPRESSION_ASSIGN_RSHIFT 246
#define EXPRESSION_ASSIGN_AND 247
#define EXPRESSION_ASSIGN_XOR 248
#define EXPRESSION_ASSIGN_OR 249
#define EXPRESSION_COMMA 250
; TYPES: A type is a 4 - byte index into the global array `types` . Byte 0 in `types`
; is reserved , and bytes 1 - 16 contain the values 1 - 16 . Thus TYPE_INT , etc .
; can be used as types directly .
; The format of each type is as follows:
; char , unsigned char , etc . : TYPE_CHAR , TYPE_UNSIGNED_CHAR , etc . as a single byte
; pointer to type t: TYPE_PTR t
; array of n t's: TYPE_ARRAY {n as 8 bytes} t
2022-01-25 18:28:26 -05:00
; struct/union: TYPE_STRUCT {8 - byte pointer to struct/union data (see structures in main . b)}
; note: incomplete structs/unions are replaced with void .
2022-01-19 22:23:29 -05:00
; function: TYPE_FUNCTION {arg1 type} {arg2 type} ... {argn type} 0 {return type}
2022-01-24 20:53:37 -05:00
; note that enum types are just treated as ints .
2022-01-13 16:12:28 -05:00
#define TYPE_VOID 1
#define TYPE_CHAR 3
#define TYPE_UNSIGNED_CHAR 4
#define TYPE_SHORT 5
#define TYPE_UNSIGNED_SHORT 6
#define TYPE_INT 7
#define TYPE_UNSIGNED_INT 8
#define TYPE_LONG 9
2022-02-03 21:46:36 -05:00
#define TYPE_UNSIGNED_LONG 0xa
#define TYPE_FLOAT 0xb
2022-01-13 16:12:28 -05:00
; note that long double is treated the same as double .
2022-02-03 21:46:36 -05:00
#define TYPE_DOUBLE 0xc
#define TYPE_POINTER 0xd
#define TYPE_STRUCT 0xe
#define TYPE_ARRAY 0xf
#define TYPE_FUNCTION 0x10
; reading the first 16 bits of type data as a word will give this if the type refers to a function pointer .
#define TYPE2_FUNCTION_POINTER 0x100d
2022-01-13 16:12:28 -05:00
; types willl be initialized (in main) so that this refers to the type char*
#define TYPE_POINTER_TO_CHAR 20
2022-02-03 22:53:38 -05:00
; STATEMENTS
; In C , note that `if' , `while' , etc . always have a single statement as their body:
; if (x) { y; z; w; }
; here {y; z; w;} is a single `compound' statement containing three statements .
; our statements don't directly correspond to the C89 standard's notion of statements , in particular ,
; labels count as separate statements and declarations count as statements .
; each statement is stored as exactly 40 bytes
; uchar type
; uchar padding
; ushort file
; uint line
; ulong data1
; ulong data2
; ulong data3
; ulong data4
; a type of 0 indicates the end of the block .
; data layout for particular statements:
; - STATEMENT_EXPRESSION - data1 is a pointer to expression data; data2 , 3 , 4 are unused
; - STATEMENT_LOCAL_DECLARATION - declaring a local variable (automatic/"register" storage duration) , data1 = total bytes used by all local variables so far in this function including this one; data2 , 3 , 4 unused
; - STATEMENT_LABEL - data1 is a pointer to the name of the label; data2 , 3 , 4 are unused
; - STATEMENT_BLOCK - data1 is a pointer to an array of statements; data2 , 3 , 4 are unused
; - STATEMENT_IF - data1 is a pointer to the condition , data2 is a pointer to the `if' branch statement , data3 is a pointer to the `else' branch statement , or 0 if there is none; data4 is unused
; - STATEMENT_SWITCH - data1 is a pointer to the expression , data2 is a pointer to the body statement; data3 , 4 are unused
; - STATEMENT_WHILE - data1 is a pointer to the condition , data2 is a pointer to the body statement; data3 , 4 are unused
; - STATEMENT_DO - data1 is a pointer to the body statement , data2 is a pointer to the condition; data3 , 4 are unused
; - STATEMENT_FOR - data1 , 2 , 3 are pointers to the first , second , and third expressions inside parentheses , data4 is a pointer to the body statement
; - STATEMENT_GOTO - data1 is a pointer to the name of the label; data2 , 3 , 4 are unused
; - STATEMENT_CONTINUE - data1 , 2 , 3 , 4 are unused
; - STATEMENT_BREAK - data1 , 2 , 3 , 4 are unused
; - STATEMENT_RETURN - data1 is a pointer to the expression , or 0 if there is none; data2 , 3 , 4 are unused
2022-02-04 22:36:22 -05:00
; - STATEMENT_CASE - data1 is the value; data2 , 3 , 4 are unused
2022-02-03 22:53:38 -05:00
#define STATEMENT_EXPRESSION 1
#define STATEMENT_LOCAL_DECLARATION 2
#define STATEMENT_LABEL 3
#define STATEMENT_BLOCK 4
#define STATEMENT_IF 5
#define STATEMENT_SWITCH 6
#define STATEMENT_WHILE 7
#define STATEMENT_DO 8
#define STATEMENT_FOR 9
#define STATEMENT_GOTO 0xa
#define STATEMENT_CONTINUE 0xb
#define STATEMENT_BREAK 0xc
#define STATEMENT_RETURN 0xd
2022-02-04 22:36:22 -05:00
#define STATEMENT_CASE 0xe
2022-02-03 22:53:38 -05:00
2022-01-11 11:53:36 -05:00
:keyword_table
byte SYMBOL_SEMICOLON
byte 59
byte 0
byte SYMBOL_EQ
string =
byte 0
byte SYMBOL_LBRACE
string {
byte 0
byte SYMBOL_RBRACE
string }
byte 0
byte SYMBOL_LSQUARE
string [
byte 0
byte SYMBOL_RSQUARE
string ]
byte 0
byte SYMBOL_LPAREN
string (
byte 0
byte SYMBOL_RPAREN
string )
byte 0
byte SYMBOL_COMMA
string ,
byte 0
byte SYMBOL_PLUS_EQ
string + =
byte 0
byte SYMBOL_MINUS_EQ
string - =
byte 0
byte SYMBOL_TIMES_EQ
string *=
byte 0
byte SYMBOL_DIV_EQ
string /=
byte 0
byte SYMBOL_PERCENT_EQ
string %=
byte 0
byte SYMBOL_LSHIFT_EQ
string << =
byte 0
byte SYMBOL_RSHIFT_EQ
string >> =
byte 0
byte SYMBOL_AND_EQ
string &=
byte 0
byte SYMBOL_XOR_EQ
string ^=
byte 0
byte SYMBOL_OR_EQ
string |=
byte 0
byte SYMBOL_QUESTION
string ?
byte 0
byte SYMBOL_OR_OR
string ||
byte 0
byte SYMBOL_AND_AND
string &&
byte 0
byte SYMBOL_OR
string |
byte 0
byte SYMBOL_XOR
string ^
byte 0
byte SYMBOL_AND
string &
byte 0
byte SYMBOL_EQ_EQ
string ==
byte 0
byte SYMBOL_NOT_EQ
string !=
byte 0
byte SYMBOL_LT
string <
byte 0
byte SYMBOL_GT
string >
byte 0
byte SYMBOL_LT_EQ
string < =
byte 0
byte SYMBOL_GT_EQ
string > =
byte 0
byte SYMBOL_LSHIFT
string <<
byte 0
byte SYMBOL_RSHIFT
string >>
byte 0
byte SYMBOL_PLUS
string +
byte 0
byte SYMBOL_MINUS
string -
byte 0
byte SYMBOL_TIMES
string *
byte 0
byte SYMBOL_DIV
string /
byte 0
byte SYMBOL_PERCENT
string %
byte 0
byte SYMBOL_PLUS_PLUS
string ++
byte 0
byte SYMBOL_MINUS_MINUS
string --
byte 0
byte SYMBOL_NOT
string !
byte 0
byte SYMBOL_TILDE
string ~
byte 0
byte SYMBOL_ARROW
string - >
byte 0
2022-01-13 18:13:29 -05:00
byte SYMBOL_DOT
string .
byte 0
2022-01-11 11:53:36 -05:00
byte SYMBOL_DOTDOTDOT
string ...
byte 0
byte SYMBOL_COLON
string :
byte 0
byte KEYWORD_DOUBLE
string double
byte 0
byte KEYWORD_INT
string int
byte 0
byte KEYWORD_STRUCT
string struct
byte 0
byte KEYWORD_BREAK
string break
byte 0
byte KEYWORD_ELSE
string else
byte 0
byte KEYWORD_LONG
string long
byte 0
byte KEYWORD_SWITCH
string switch
byte 0
byte KEYWORD_CASE
string case
byte 0
byte KEYWORD_ENUM
string enum
byte 0
byte KEYWORD_TYPEDEF
string typedef
byte 0
byte KEYWORD_CHAR
string char
byte 0
byte KEYWORD_EXTERN
string extern
byte 0
byte KEYWORD_RETURN
string return
byte 0
byte KEYWORD_UNION
string union
byte 0
byte KEYWORD_FLOAT
string float
byte 0
byte KEYWORD_SHORT
string short
byte 0
byte KEYWORD_UNSIGNED
string unsigned
byte 0
byte KEYWORD_CONTINUE
string continue
byte 0
byte KEYWORD_FOR
string for
byte 0
byte KEYWORD_VOID
string void
byte 0
byte KEYWORD_DEFAULT
string default
byte 0
byte KEYWORD_GOTO
string goto
byte 0
byte KEYWORD_SIZEOF
string sizeof
byte 0
byte KEYWORD_DO
string do
byte 0
byte KEYWORD_IF
string if
byte 0
byte KEYWORD_STATIC
string static
byte 0
byte KEYWORD_WHILE
string while
byte 0
byte 255
2022-02-04 22:36:22 -05:00
; NB: some of these are only used for nice debug output
2022-01-11 00:09:11 -05:00
:str_missing_closing_paren
2022-01-09 15:56:31 -05:00
string Missing closing ) .
byte 0
2022-01-08 12:15:17 -05:00
:str_comment_start
string /*
byte 0
:str_comment_end
string */
byte 0
:str_lshift_eq
string << =
byte 0
:str_rshift_eq
string >> =
byte 0
:str_eq_eq
string ==
byte 0
:str_not_eq
string !=
byte 0
:str_gt_eq
string > =
byte 0
:str_lt_eq
string < =
byte 0
:str_plus_plus
string ++
byte 0
:str_minus_minus
string --
byte 0
:str_plus_eq
string + =
byte 0
:str_minus_eq
string - =
byte 0
:str_times_eq
string *=
byte 0
:str_div_eq
string /=
byte 0
2022-01-11 00:09:11 -05:00
:str_percent_eq
2022-01-08 12:15:17 -05:00
string %=
byte 0
:str_and_eq
string &=
byte 0
:str_or_eq
string |=
byte 0
:str_xor_eq
string ^=
byte 0
:str_and_and
string &&
byte 0
:str_or_or
string ||
byte 0
:str_lshift
string <<
byte 0
:str_rshift
string >>
byte 0
:str_arrow
string - >
byte 0
:str_dotdotdot
string ...
byte 0
:str_hash_hash
string ##
byte 0
2022-01-11 00:09:11 -05:00
:str_eq
string =
byte 0
:str_not
string !
byte 0
:str_tilde
string ~
byte 0
:str_lt
string <
byte 0
:str_gt
string >
byte 0
:str_and
string &
byte 0
:str_or
string |
byte 0
:str_xor
string ^
byte 0
:str_plus
string +
byte 0
:str_minus
string -
byte 0
:str_times
string *
byte 0
:str_div
string /
byte 0
:str_percent
string %
byte 0
:str_question
string ?
byte 0
:str_comma
string ,
byte 0
:str_colon
string :
byte 0
:str_semicolon
byte 59
byte 0
:str_dot
string .
byte 0
:str_lparen
string (
byte 0
:str_rparen
string )
byte 0
:str_lsquare
string [
byte 0
:str_rsquare
string ]
byte 0
:str_lbrace
string {
byte 0
:str_rbrace
string }
byte 0
2022-01-08 14:37:39 -05:00
:str_error
string error
byte 0
:str_define
string define
byte 0
2022-01-09 21:55:00 -05:00
:str_undef
string undef
byte 0
2022-01-09 22:33:33 -05:00
:str_pragma
string pragma
byte 0
:str_line
string line
byte 0
2022-01-08 14:37:39 -05:00
:str_include
string include
byte 0
:str_ifdef
string ifdef
byte 0
:str_ifndef
string ifndef
byte 0
:str_if
string if
byte 0
:str_elif
string elif
byte 0
:str_else
string else
byte 0
2022-01-10 18:04:53 -05:00
:str_endif
string endif
byte 0
2022-01-10 12:00:05 -05:00
:str___FILE__
string __FILE__
byte 0
:str___LINE__
string __LINE__
byte 0
:str___DATE__
string __DATE__
byte 0
:str___TIME__
string __TIME__
byte 0
:str___STDC__
string __STDC__
byte 0
2022-01-13 16:12:28 -05:00
:str_void
string void
byte 0
:str_char
string char
byte 0
:str_unsigned_char
string unsigned char
byte 0
:str_short
string short
byte 0
:str_unsigned_short
string unsigned short
byte 0
:str_int
string int
byte 0
:str_unsigned_int
string unsigned int
byte 0
:str_long
string long
byte 0
:str_unsigned_long
string unsigned long
byte 0
:str_float
string float
byte 0
:str_double
string double
byte 0
:str_struct
string struct
byte 0
:str_union
string union
byte 0
2022-02-04 19:31:45 -05:00
:str_typedef
2022-01-27 10:57:18 -05:00
string typedef
byte 0
2022-02-04 19:31:45 -05:00
:str_return
string return
byte 0
2022-02-04 22:36:22 -05:00
:str_goto
string goto
byte 0
:str_case
string case
byte 0