instruction table, remove old instructions
This commit is contained in:
parent
e52793324a
commit
f82581ed76
5 changed files with 90 additions and 156 deletions
|
@ -33,8 +33,6 @@ sw - mov word [rbx], ax
|
||||||
lw - mov ax, word [rbx]
|
lw - mov ax, word [rbx]
|
||||||
sb - mov byte [rbx], al
|
sb - mov byte [rbx], al
|
||||||
lb - mov al, byte [rbx]
|
lb - mov al, byte [rbx]
|
||||||
Sq - mov qword [rsp], rax
|
|
||||||
Lq - mov rax, qword [rsp]
|
|
||||||
|
|
||||||
nA - neg rax
|
nA - neg rax
|
||||||
+B - add rax, rbx
|
+B - add rax, rbx
|
||||||
|
|
4
01/in00
4
01/in00
|
@ -10110,7 +10110,7 @@ cc cc cc cc cc cc cc cc
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
04 48 8b 04 24 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
|
@ -11006,7 +11006,7 @@ cc cc cc cc cc cc cc cc
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
04 48 89 04 24 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
00 00 00 00 00 00 00 00
|
00 00 00 00 00 00 00 00
|
||||||
|
|
4
02/in01
4
02/in01
|
@ -10328,7 +10328,7 @@ the formatting changed appropriately.
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;04;48;8b;04;24;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
|
@ -11224,7 +11224,7 @@ the formatting changed appropriately.
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;04;48;89;04;24;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
;00;00;00;00;00;00;00;00
|
;00;00;00;00;00;00;00;00
|
||||||
|
|
100
README.md
100
README.md
|
@ -3,7 +3,7 @@
|
||||||
Compilers nowadays are written in languages like C, which themselves need to be
|
Compilers nowadays are written in languages like C, which themselves need to be
|
||||||
compiled. But then, you need a C compiler to compile your C compiler! Of course,
|
compiled. But then, you need a C compiler to compile your C compiler! Of course,
|
||||||
the very first C compiler was not written in C (because how would it be
|
the very first C compiler was not written in C (because how would it be
|
||||||
compiled?). Instead, it was built up over time, starting from a very basic
|
compiled?). Instead, it was built up over time, starting from a basic
|
||||||
assembler, eventually reaching a full-scale compiler.
|
assembler, eventually reaching a full-scale compiler.
|
||||||
In this repository, we'll explore how that's done. Each directory
|
In this repository, we'll explore how that's done. Each directory
|
||||||
represents a new "stage" in the process. The first one, `00`, is a hand-written
|
represents a new "stage" in the process. The first one, `00`, is a hand-written
|
||||||
|
@ -37,9 +37,9 @@ want to know before starting.
|
||||||
You don't need to understand everything about each of these, just get
|
You don't need to understand everything about each of these, just get
|
||||||
a general idea:
|
a general idea:
|
||||||
|
|
||||||
|
- the basics of programming
|
||||||
- what a system call is
|
- what a system call is
|
||||||
- what memory is
|
- what memory is
|
||||||
- what a programming language is
|
|
||||||
- what a compiler is
|
- what a compiler is
|
||||||
- what an executable file is
|
- what an executable file is
|
||||||
- number bases -- if a number is preceded by 0x, 0o, or 0b in this series, that
|
- number bases -- if a number is preceded by 0x, 0o, or 0b in this series, that
|
||||||
|
@ -55,19 +55,9 @@ decimal.
|
||||||
- ASCII, null-terminated strings
|
- ASCII, null-terminated strings
|
||||||
- how pointers work
|
- how pointers work
|
||||||
- how floating-point numbers work
|
- how floating-point numbers work
|
||||||
- some basic Intel-style x86-64 assembly
|
|
||||||
|
|
||||||
It will help you a lot to know how to program (with any programming language),
|
If you aren't familiar with x86-64 assembly, be sure to check out the instruction list
|
||||||
but it's not strictly necessary.
|
below.
|
||||||
|
|
||||||
## instruction set
|
|
||||||
|
|
||||||
x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
|
|
||||||
long! So it makes sense to select only a small subset of it to use.
|
|
||||||
The set I've chosen can be found in `instructions.txt`.
|
|
||||||
I think it achieves a pretty good balance between having few enough
|
|
||||||
instructions to be manageable and having enough instructions to be useable.
|
|
||||||
To be clear, you don't need to read that file to understand the series.
|
|
||||||
|
|
||||||
## principles
|
## principles
|
||||||
|
|
||||||
|
@ -103,6 +93,88 @@ an operating system to a USB key with a circuit or something,
|
||||||
assuming you trust your CPU...
|
assuming you trust your CPU...
|
||||||
I'll leave that to someone else.
|
I'll leave that to someone else.
|
||||||
|
|
||||||
|
## instruction set
|
||||||
|
|
||||||
|
x86-64 has a *gigantic* instruction set. The manual for it is over 2,000 pages
|
||||||
|
long! So it makes sense to select only a small subset of it to use.
|
||||||
|
|
||||||
|
Here are all the instructions we'll be using. If you're not familiar with
|
||||||
|
x86-64 assembly, you might want to look over these (but you don't need to understand everything).
|
||||||
|
|
||||||
|
In the table below, `IMM64` means a 64-bit *immediate* (a constant number).
|
||||||
|
`rdx:rax` refers to the 128-bit number you get by combining `rdx` and `rax`.
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────────────┬───────────────────┬────────────────────────────────────────┐
|
||||||
|
│ Instruction │ Encoding │ Description │
|
||||||
|
├──────────────────────┼───────────────────┼────────────────────────────────────────┤
|
||||||
|
│ mov rax, IMM64 │ 48 b8 IMM64 │ set rax to the 64-bit value IMM64 │
|
||||||
|
│ xor eax, eax │ 31 c0 │ set rax to 0 (shorter than mov rax, 0) │
|
||||||
|
│ xor edx, edx │ 31 d2 │ set rdx to 0 │
|
||||||
|
│ mov RDEST, RSRC │ 48 89 (DEST|SRC<<3|0xc0) │ set register DEST to current │
|
||||||
|
│ │ │ value of register SRC │
|
||||||
|
│ mov r8, rax │ 49 89 c0 │ set r8 to rax (only used for syscalls) │
|
||||||
|
│ mov r9, rax │ 49 89 c1 │ set r9 to rax (only used for syscalls) │
|
||||||
|
│ mov r10, rax │ 49 89 c2 │ set r10 to rax (only used for syscalls)│
|
||||||
|
│ xchg rax, rbx │ 48 93 │ exchange the values of rax and rbx │
|
||||||
|
│ mov [rbx], rax │ 48 89 03 │ store rax as 8 bytes at address rbx │
|
||||||
|
│ mov rax, [rbx] │ 48 8b 03 │ load 8 bytes from address rbx into rax │
|
||||||
|
│ mov [rbx], eax │ 89 03 │ store eax as 4 bytes at address rbx │
|
||||||
|
│ mov eax, [rbx] │ 8b 03 │ load 4 bytes from address rbx into eax │
|
||||||
|
│ mov [rbx], ax │ 66 89 03 │ store ax as 2 bytes at address rbx │
|
||||||
|
│ mov ax, [rbx] │ 66 8b 03 │ load 2 bytes from address rbx into eax │
|
||||||
|
│ mov [rbx], al │ 88 03 │ store al as 1 byte at address rbx │
|
||||||
|
│ mov al, [rbx] │ 8a 03 │ load 1 byte from addrress rbx into al │
|
||||||
|
│ mov rax, [rbp+IMM32] │ 48 8b 85 IMM32 │ load 8 bytes from address rbp+IMM32 │
|
||||||
|
│ │ │ into rax (note: IMM32 may be negative) │
|
||||||
|
│ lea rax, [rbp+IMM32] │ 48 8d 85 IMM32 │ set rax to rbp+IMM32 │
|
||||||
|
│ lea rsp, [rbp+IMM32] │ 48 8d a5 IMM32 │ set rsp to rbp+IMM32 │
|
||||||
|
│ mov [rbp+IMM32], rax │ 48 89 85 IMM32 │ store rax in 8 bytes at rbp+IMM32 │
|
||||||
|
│ mov [rsp+IMM32], rax │ 48 89 84 24 IMM32 │ store rax in 8 bytes at rsp+IMM32 │
|
||||||
|
│ mov [rsp], rbp │ 48 89 2c 24 │ store rbp in 8 bytes at rsp │
|
||||||
|
│ mov rbp, [rsp] │ 48 8b 2c 24 │ load 8 bytes from rsp into rbp │
|
||||||
|
│ neg rax │ 48 f7 d8 │ set rax to -rax │
|
||||||
|
│ add rax, rbx │ 48 01 d8 │ add rbx to rax │
|
||||||
|
│ sub rax, rbx │ 48 29 d8 │ subtract rbx from rax │
|
||||||
|
│ imul rbx │ 48 f7 eb │ set rdx:rax to rax * rbx (signed) │
|
||||||
|
│ idiv rbx │ 48 f7 fb │ divide rdx:rax by rbx (signed); put │
|
||||||
|
│ │ │ quotient in rax, remainder in rbx │
|
||||||
|
│ mul rbx │ 48 f7 e3 │ like imul, but unsigned │
|
||||||
|
│ div rbx │ 48 f7 f3 │ like idiv, but with unsigned division │
|
||||||
|
│ not rax │ 48 f7 d0 │ set rax to ~rax (bitwise not) │
|
||||||
|
│ and rax, rbx │ 48 21 d8 │ set rax to rax & rbx (bitwise and) │
|
||||||
|
│ or rax, rbx │ 48 09 d8 │ set rax to rax | rbx (bitwise or) │
|
||||||
|
│ xor rax, rbx │ 48 31 d8 │ set rax to rax ^ rbx (bitwise xor) │
|
||||||
|
│ shl rax, cl │ 48 d3 e0 │ set rax to rax << cl (left shift) │
|
||||||
|
│ shl rax, IMM8 │ 48 c1 e0 IMM8 │ set rax to rax << IMM8 │
|
||||||
|
│ shr rax, cl │ 48 d3 e8 │ set rax to rax >> cl (zero-extend) │
|
||||||
|
│ shr rax, IMM8 │ 48 c1 e8 IMM8 │ set rax to rax >> IMM8 (zero-extend) │
|
||||||
|
│ sar rax, cl │ 48 d3 f8 │ set rax to rax >> cl (sign-extend) │
|
||||||
|
│ sar rax, IMM8 │ 48 c1 f8 IMM8 │ set rax to rax >> IMM8 (sign-extend) │
|
||||||
|
│ sub rsp, IMM32 │ 48 81 ec IMM32 │ subtract IMM32 from rsp │
|
||||||
|
│ add rsp, IMM32 │ 48 81 c4 IMM32 │ add IMM32 to rsp │
|
||||||
|
│ cmp rax, rbx │ 48 39 d8 │ compare rax with rbx (see je, jl, etc.)│
|
||||||
|
│ test rax, rax │ 48 85 c0 │ equivalent to cmp rax, 0 │
|
||||||
|
│ jmp IMM32 │ e9 IMM32 │ jump to offset IMM32 from here │
|
||||||
|
│ je IMM32 │ 0f 84 IMM32 │ jump to IMM32 if equal │
|
||||||
|
│ jne IMM32 │ 0f 85 IMM32 │ jump if not equal │
|
||||||
|
│ jl IMM32 │ 0f 8c IMM32 │ jump if less than │
|
||||||
|
│ jg IMM32 │ 0f 8f IMM32 │ jump if greater than │
|
||||||
|
│ jle IMM32 │ 0f 8e IMM32 │ jump if less than or equal to │
|
||||||
|
│ jge IMM32 │ 0f 8d IMM32 │ jump if greater than or equal to │
|
||||||
|
│ jb IMM32 │ 0f 82 IMM32 │ jump if "below" (like jl but unsigned) │
|
||||||
|
│ ja IMM32 │ 0f 87 IMM32 │ jump if "above" (like jg but unsigned) │
|
||||||
|
│ jbe IMM32 │ 0f 86 IMM32 │ jump if below or equal to │
|
||||||
|
│ jae IMM32 │ 0f 83 IMM32 │ jump if above or equal to │
|
||||||
|
│ call rax │ ff d0 │ call the function at address rax │
|
||||||
|
│ ret │ c3 │ return from function │
|
||||||
|
│ syscall │ 0f 05 │ execute a system call │
|
||||||
|
│ nop │ 90 │ do nothing │
|
||||||
|
└──────────────────────┴───────────────────┴────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
More will be added in the future as needed.
|
||||||
|
|
||||||
## license
|
## license
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
136
instructions.txt
136
instructions.txt
|
@ -1,136 +0,0 @@
|
||||||
Linux syscall calling convention:
|
|
||||||
rax - syscall number
|
|
||||||
rdi, rsi, rdx, r10, r8, r9 - arguments
|
|
||||||
return value placed in rax
|
|
||||||
|
|
||||||
Instruction set:
|
|
||||||
|
|
||||||
mov rax, imm64
|
|
||||||
>48 b8 IMM64
|
|
||||||
xor eax, eax (sets rax to 0, much shorter than mov rax, 0)
|
|
||||||
>31 c0
|
|
||||||
xor edx, edx
|
|
||||||
>31 d2
|
|
||||||
mov rdest, rsrc
|
|
||||||
ax bx cx dx sp bp si di
|
|
||||||
0 3 1 2 4 5 6 7
|
|
||||||
>48 89 (dest | src << 3 | 0xc0)
|
|
||||||
mov r8, rax (for syscalls)
|
|
||||||
>49 89 c0
|
|
||||||
mov r9, rax (for syscalls)
|
|
||||||
>49 89 c1
|
|
||||||
mov r10, rax (for syscalls)
|
|
||||||
>49 89 c2
|
|
||||||
xchg rax, rbx
|
|
||||||
>48 93
|
|
||||||
mov qword [rbx], rax
|
|
||||||
>48 89 03
|
|
||||||
mov rax, qword [rbx]
|
|
||||||
>48 8b 03
|
|
||||||
mov dword [rbx], eax
|
|
||||||
>89 03
|
|
||||||
mov eax, dword [rbx]
|
|
||||||
>8b 03
|
|
||||||
mov word [rbx], ax
|
|
||||||
>66 89 03
|
|
||||||
mov ax, word [rbx]
|
|
||||||
>66 8b 03
|
|
||||||
mov byte [rbx], al
|
|
||||||
>88 03
|
|
||||||
mov al, byte [rbx]
|
|
||||||
>8a 03
|
|
||||||
mov rax, qword [rbp+imm32]
|
|
||||||
>48 8b 85 IMM32 (note: imm may be negative)
|
|
||||||
lea rax, [rbp+imm32]
|
|
||||||
>48 8d 85 IMM32 (note: imm may be negative)
|
|
||||||
lea rsp, [rbp+imm32]
|
|
||||||
>48 8d a5 IMM32 (note: imm may be negative)
|
|
||||||
mov qword [rbp+imm32], rax
|
|
||||||
>48 89 85 IMM32 (note: imm may be negative)
|
|
||||||
mov qword [rsp+imm32], rax
|
|
||||||
>48 89 84 24 IMM32 (note: imm may be negative)
|
|
||||||
mov qword [rsp], rbp
|
|
||||||
>48 89 2c 24
|
|
||||||
mov rbp, qword [rsp]
|
|
||||||
>48 8b 2c 24
|
|
||||||
mov ebx, imm32
|
|
||||||
>bb IMM32
|
|
||||||
neg rax
|
|
||||||
>48 f7 d8
|
|
||||||
add rax, rbx
|
|
||||||
>48 01 d8
|
|
||||||
sub rax, rbx
|
|
||||||
>48 29 d8
|
|
||||||
imul rbx
|
|
||||||
>48 f7 eb
|
|
||||||
idiv rbx
|
|
||||||
>48 f7 fb
|
|
||||||
mul rbx
|
|
||||||
>48 f7 e3
|
|
||||||
div rbx
|
|
||||||
>48 f7 f3
|
|
||||||
not rax
|
|
||||||
>48 f7 d0
|
|
||||||
and rax, rbx
|
|
||||||
>48 21 d8
|
|
||||||
or rax, rbx
|
|
||||||
>48 09 d8
|
|
||||||
xor rax, rbx
|
|
||||||
>48 31 d8
|
|
||||||
shl rax, cl
|
|
||||||
>48 d3 e0
|
|
||||||
shl rax, imm8
|
|
||||||
>48 c1 e0 IMM8
|
|
||||||
shr rax, cl
|
|
||||||
>48 d3 e8
|
|
||||||
shr rax, imm8
|
|
||||||
>48 c1 e8 IMM8
|
|
||||||
sar rax, cl
|
|
||||||
>48 d3 f8
|
|
||||||
sar rax, imm8
|
|
||||||
>48 c1 f8 IMM8
|
|
||||||
sub rsp, imm32
|
|
||||||
>48 81 ec IMM32
|
|
||||||
add rsp, imm32
|
|
||||||
>48 81 c4 IMM32
|
|
||||||
cmp rax, rbx
|
|
||||||
>48 39 d8
|
|
||||||
test rax, rax
|
|
||||||
>48 85 c0
|
|
||||||
jmp rel32
|
|
||||||
>e9 REL32
|
|
||||||
je rel32
|
|
||||||
>0f 84 REL32
|
|
||||||
jne rel32
|
|
||||||
>0f 85 REL32
|
|
||||||
jl rel32
|
|
||||||
>0f 8c REL32
|
|
||||||
jg rel32
|
|
||||||
>0f 8f REL32
|
|
||||||
jle rel32
|
|
||||||
>0f 8e REL32
|
|
||||||
jge rel32
|
|
||||||
>0f 8d REL32
|
|
||||||
jb rel32
|
|
||||||
>0f 82 REL32
|
|
||||||
ja rel32
|
|
||||||
>0f 87 REL32
|
|
||||||
jbe rel32
|
|
||||||
>0f 86 REL32
|
|
||||||
jae rel32
|
|
||||||
>0f 83 REL32
|
|
||||||
call rax
|
|
||||||
>ff d0
|
|
||||||
ret
|
|
||||||
>c3
|
|
||||||
syscall
|
|
||||||
>0f 05
|
|
||||||
nop
|
|
||||||
>90
|
|
||||||
(more will be added as needed)
|
|
||||||
|
|
||||||
to be removed:
|
|
||||||
mov qword [rsp], rax
|
|
||||||
>48 89 04 24
|
|
||||||
mov rax, qword [rsp]
|
|
||||||
>48 8b 04 24
|
|
Loading…
Add table
Add a link
Reference in a new issue