From 787cb6dff2984ec76c738a27d6fb5265750433ee Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Sun, 14 Jul 2024 22:54:52 -0400 Subject: [PATCH] checkpoint commit --- SForth.dmp | 808 +++++++++++++++++++++++++++-------------------------- asm.rb | 174 ++++++++++++ asm.s | 3 + dis.sh | 2 +- test.c | 38 ++- 5 files changed, 617 insertions(+), 408 deletions(-) create mode 100755 asm.rb create mode 100644 asm.s diff --git a/SForth.dmp b/SForth.dmp index 03e5bfe..a79e5ec 100644 --- a/SForth.dmp +++ b/SForth.dmp @@ -136,405 +136,409 @@ # in the argument. # # MACHINE CODE ########## INTENTION ############ 78 INSTRUCTION ####### OPCODE ######## ModR/M #### SIB ###### -BE B2 00 40 00 #:rsi(input) = 004000__ mov r32, imm32 B8+rd id -BF 30 00 00 10 # rdi(output) = 10000030 mov r32, imm32 B8+rd id - -######################### binary interpreter >>> 82 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -E8 02 00 00 00 #+call (bi) call rel32 -EB F9 #-jump bi 89 jmp rel8 EB cb -# # # # # # # # # # # # # (bi) 89 -AC # al = [rsi++] lods m8 AC -3C 99 # cmp al, 99(command) cmp al, imm8 3C ib -74 02 #+jump _command if == 8E je rel8 74 cb -AA # [rdi++] = al (xmit) stos m8 AA -C3 # return ret C3 -# _command: # 90 -BA 28 00 00 10 # rdx = Latest mov r32, imm32 B8+rd id -AC # al = [rsi++] (argument) lods m8 AC -A8 60 # al & 60(graphic)? test al, imm8 A8 ib -74 31 #+jump Head if zero 9A jz rel8 74 cb -48 8B 1A # rbx = [rdx] mov r64, r/m64 REX.W 8B /r 00 011 010 -# _find1: # 9D -50 # push al push r64 50+rd -24 7F # al &= 7F and al, imm8 24 ib -3A 43 11 # cmp al, [rbx+11] cmp r8, r/m8 REX 3A /r 01 000 011 -58 # pop al pop r64 58+rd -74 06 #+jump _match if == A6 je rel8 74 cb -48 8B 5B 08 # rbx = [rbx+8] mov r64, r/m64 REX.W 8B /r 01 011 011 -EB F1 #-jump _find1 AC jmp rel8 EB cb -# _match: # AC -A8 80 # al & 80(exec) ? test al, imm8 A8 ib -74 09 #+jump COMPL if zero B0 jz rel8 74 cb -FF 23 # jump [rbx] (exec) B2 jmp r/m64 REX FF /4 00 100 011 - -######################### Interpreter subroutines ################################################ - -99 05 43 4F 4D 50 4C #### COMPL Forth's COMPILE, B9 ( ebx=xt -- ) -B0 FF AA # compile >>>>>>>>>>>>>>>>> call r/m64 FF /2 00 010 100 00 100 101 -B0 14 AA # al = _ mov r8, imm8 B0+rb ib -B0 25 AA # [rdi++] = al stos m8 AA -93 # eax = ebx xchg eax, r32 90+rd -AB # [rdi(++4)] = eax stos m32 AB -C3 # return ret C3 - -99 04 48 65 61 64 ####### Head ================= CB ( al=flag rdx=Latest rsi=addr -- rdx=Latest rsi=addr' ) -48 83 C7 0F # rdi += 0F add r/m64, imm8 REX.W 83 /0 ib 11 000 111 -48 83 E7 F0 # rdi &= F0 and r/m64, imm8 REX.W 83 /4 ib 11 100 111 -48 8B 0A # rcx = [rdx] mov r64, r/m64 REX.W 8B /r 00 001 010 -48 89 4F 08 # [rdi+8] = rcx mov r/m64, r64 REX.W 89 /r 01 001 111 -48 89 3A # [rdx] = rdi mov r/m64, r64 REX.W 89 /r 00 111 010 -48 83 C7 10 # rdi += 10 add r/m64, imm8 REX.W 83 /0 ib 11 000 111 -AA # [rdi++] = al stos m8 AA -91 # ecx = eax xchg eax, r32 90+rd -83 E1 1F # ecx &= 1F and r/m32, imm8 83 /4 ib 11 100 001 -F3 A4 # copy Name rep movs m8, m8 F3 A4 -48 8B 0A # rcx = [rdx] mov r64, r/m64 REX.W 8B /r 00 001 010 -48 89 39 # [rcx] = rdi mov r/m64, r64 REX.W 89 /r 00 111 001 -C3 # return ret C3 - -# ============= DICTIONARY FORMAT -# -# Each SmithForth dictionary entry begins with: -# (8 bytes) Code -# (8 bytes) Link -# (1 byte) Flag (3 bits) and Length (5 bits) of Name -# (Length bytes) Name, where Length < 2^5. -# Each subroutine call refers to its callee. See argument ZZ in the following example: -# -# WW WW WW WW WW WW WW WW # Code: address of a subroutine (usually right after Name) -# XX XX XX XX XX XX XX XX # Link: address of the next earlier dictionary entry -# YY # Flag: 80=IMMEDIATE, 40=HIDDEN ; Name Length -# 2E 53 # Name: .S ( -- ) show the values on the data stack -# 4D 89 7F F8 # [r15-8] = r15 (obuf) mov r/m64, r64 REX.W 89 /r 01 111 111 -# 49 C7 47 F0 00 00 00 10 # [r15-10] = 10000000 (len) mov r/m64, imm32 REX.W C7 /0 id 01 000 111 -# 4D 29 7F F0 # [r15-10] -= r15 sub r/m64, r64 REX.W 29 /r 01 111 111 -# 49 83 EF 10 # r15 -= 2 cells sub r/m64, imm8 REX.W 83 /5 ib 11 101 111 -# FF 14 25 ZZ ZZ ZZ ZZ # call TYPE call r/m64 FF /2 00 010 100 00 100 101 -# C3 # return ret C3 - -99 03 42 59 45 ########## BYE ( -- ) ============================================================= -6A 3C 58 # rax = exit (no return) push imm8; pop 6A ib ; 58+rd -31 FF # rdi = stat xor r/m32, r32 31 /r 11 111 111 -0F 05 # syscall syscall 0F 05 - -# 99 C2 # BYE - -# Linux syscall: ( RDI RSI RDX R10 R8 R9 RAX=syscall# -- RAX=stat RCX=? R11=? ) -# Manual pages on system calls: `man 2 syscalls ; man 2 exit ; man 2 read ; man 2 write ; man 2 mmap` -# syscall numbers: /usr/include/x86_64-linux-gnu/asm/unistd_64.h -# syscall error numbers: /usr/include/asm-generic/errno-base.h -# mmap flag values: /usr/include/asm-generic/mman-common.h - -99 04 54 59 50 45 ####### TYPE ( rsi=addr rdx=u -- rsi=? rdi=? ) show memory [addr, addr+u) ====== -6A 01 5F # rdi(fd) = stdout = 1 push imm8; pop 6A ib ; 58+rd -# _beg: # 00 -8B C7 # rax = write = 1 = rdi mov r32, r/m32 8B /r 11 000 111 -0F 05 # syscall syscall 0F 05 -48 85 C0 # cmp rax, 0 test r/m64, r64 REX.W 85 /r 11 000 000 -7C 08 #+jump _end if < 09 jl rel8 7C cb -48 01 C6 # rsi(buf) += rax add r/m64, r64 REX.W 01 /r 11 000 110 -48 29 C2 # rdx(cnt) -= rax sub r/m64, r64 REX.W 29 /r 11 000 010 -7F EF #-jump _beg if > 11 jg rel8 7F cb -# _end: # 11 -C3 # return ret C3 - -# ============= DEBUGGING -# -# During development, a program like this one may crash with an uninformative error message like -# "Segmentation fault" or "Illegal instruction." How can we work in such an environment? -# We start with a trivial program that works (i.e., simply invokes syscall exit, as in BYE), -# and then expand it gradually until it does what we want. When a program breaks after a small -# change, we know where the bug is. Here is one way to go. -# -# Insert a jump to BYE at the top of the program. You have to compute the length of the jump. -# After this chore, updating it is easy if you expand the program only one instruction at a time. -# You will want to disable and enable parts of the program as you expand it. The most basic ways: -# -- Hide unwanted code in comments. If this disrupts byte counts, replace lost bytes by no-op -# instructions NOP = 90. -# -- Inside a subroutine, leave early by inserting a return instruction RET = C3. - -99 03 64 62 67 ########## dbg ( -- ) show stack and data; use `./SForth | xxd -o 0x0fffffe0` ===== -56 57 # push rsi, rdi push r64 50+rd -BE E0 FF FF 0F # rsi = addr mov r32, imm32 B8+rd id -BA 00 0A 00 00 # rdx = u mov r32, imm32 B8+rd id -99 54 # Call TYPE -5F 5E # pop rdi, rsi pop r64 58+rd -C3 # return ret C3 - -# 99 E4 99 C2 # dbg BYE - -99 03 72 65 67 ########## reg ( -- ) show registers; use `./SForth | xxd` ======================== -56 57 # push rsi, rdi push r64 50+rd -41 57 57 41 56 56 # push r15, rdi, r14, rsi push r64 REX 50+rd -41 55 55 41 54 54 # push r13, rbp, r12, rsp push r64 REX 50+rd -41 53 53 41 52 52 # push r11, rbx, r10, rdx push r64 REX 50+rd -41 51 51 41 50 50 # push r9 , rcx, r8 , rax push r64 REX 50+rd -48 8B F4 # rsi = rsp mov r64, r/m64 REX.W 8B /r 11 110 100 -BA 80 00 00 00 # rdx = u mov r32, imm32 B8+rd id -99 54 # Call TYPE -48 83 EC 80 # rsp -= -80 sub r/m64, imm8 REX.W 83 /5 ib 11 101 100 -5F 5E # pop rdi, rsi pop r64 58+rd -C3 # return ret C3 - -# 99 F2 99 C2 # reg BYE - -# ============= TEXT INTERPRETER -# -# Standard Forth handles input one line at a time. -# SmithForth's text interpreter is a simple interpreter in the standard Forth style. -# SVAL (see standard Forth's EVALUATE) interprets each line. -# REFILL fetches a line of input, including its trailing LF, and sets the input source state. -# 10000000 #IN cell contains #characters in the current line. -# 10000008 TIB cell contains the address where the current line begins. -# 10000010 >IN cell contains #characters in the current line that have been parsed. -# 10000020 STATE cell contains 0(Interpreting) or 1(Compiling). -# 10000028 Latest cell contains the execution token (xt) of the latest defined Forth word. -# In Forth, to parse is to remove from the input stream. As a line is parsed, [>IN] increases from 0 to [#IN]. -# Forth's "parse area" is the part of the line not yet parsed. - -99 06 52 45 46 49 4C 4C # REFILL ( -- ) ========================================================== -49 C7 C1 00 00 00 10 # r9 = VAR mov r/m64, imm32 REX.W C7 /0 id 11 000 001 -49 8B 01 # rax = [#IN] mov r64, r/m64 REX.W 8B /r 00 000 001 -49 01 41 08 # [TIB] += rax add r/m64, r64 REX.W 01 /r 01 000 001 -49 83 21 00 # [#IN] = 0 and r/m64, imm8 REX.W 83 /4 ib 00 100 001 -49 83 61 10 00 # [>IN] = 0 and r/m64, imm8 REX.W 83 /4 ib 01 100 001 -# _beg: # 00 -49 FF 01 # [#IN]++ inc r/m64 REX.W FF /0 00 000 001 -49 8B 41 08 # rax = [TIB] mov r64, r/m64 REX.W 8B /r 01 000 001 -49 03 01 # rax += [#IN] add r64, r/m64 REX.W 03 /r 00 000 001 -80 78 FF 0A # cmp [rax-1], LF cmp r/m8, imm8 80 /7 ib 01 111 000 -75 F0 #-jump _beg if != 10 jne rel8 75 cb -C3 # return ret C3 - -99 04 73 65 65 6B ####### seek ( cl dl "ccc" -- eflags ) parse until 1st char of parse area is within [cl, dl) or parse area is empty -49 C7 C1 00 00 00 10 # r9 = VAR mov r/m64, imm32 REX.W C7 /0 id 11 000 001 -2A D1 # dl -= cl sub r8, r/m8 2A /r 11 010 001 -# _beg: # 00 like WITHIN ( al cl dl -- eflags ) -49 8B 41 10 # rax = [>IN] mov r64, r/m64 REX.W 8B /r 01 000 001 -49 3B 01 # cmp rax, [#IN] cmp r64, r/m64 REX.W 3B /r 00 000 001 -73 16 #+jump _end if U>= 09 jae rel8 73 cb -49 8B 41 08 # rax = [TIB] mov r64, r/m64 REX.W 8B /r 01 000 001 -49 03 41 10 # rax += [>IN] add r64, r/m64 REX.W 03 /r 01 000 001 -8A 00 # al = [rax] mov r8, r/m8 8A /r 00 000 000 -2A C1 # al -= cl sub r8, r/m8 2A /r 11 000 001 -3A C2 # cmp al, dl cmp r8, r/m8 3A /r 11 000 010 -72 06 #+jump _end if U< 19 jb rel8 72 cb -49 FF 41 10 # [>IN]++ inc r/m64 REX.W FF /0 01 000 001 -EB E1 #-jump _beg 1F jmp rel8 EB cb -# _end: # 1F -C3 # return ret C3 - -99 05 50 41 52 53 45 #### PARSE ( cl dl "ccc" -- rbp=addr rax=u ) addr: where ccc begins ; u: length of ccc -49 C7 C1 00 00 00 10 # r9 = VAR mov r/m64, imm32 REX.W C7 /0 id 11 000 001 -49 8B 69 10 # rbp = [>IN] mov r64, r/m64 REX.W 8B /r 01 101 001 -99 73 # Call seek parse until 1st instance within [cl, dl) is parsed or parse area empty -49 8B 41 10 # rax = [>IN] mov r64, r/m64 REX.W 8B /r 01 000 001 -73 04 #+jump _end if U>= 00 jae rel8 73 cb -49 FF 41 10 # [>IN]++ inc r/m64 REX.W FF /0 01 000 001 -# _end: # 04 -48 29 E8 # rax -= rbp sub r/m64, r64 REX.W 29 /r 11 101 000 -49 03 69 08 # rbp += [TIB] add r64, r/m64 REX.W 03 /r 01 101 001 -C3 # return ret C3 - -99 05 70 6E 61 6D 65 #### pname ( "ccc" -- rbp=addr rax=u ) PARSE-NAME ============ -B1 21 B2 7F # (cl, dl) = (BL+1, ...) mov r8, imm8 B0+rb ib -99 73 # Call seek -B1 7F B2 21 # (cl, dl) = (..., BL+1) mov r8, imm8 B0+rb ib -99 50 # Call PARSE -C3 # return ret C3 - -99 81 5B ################ [ ( -- ) lbracket IMMEDIATE ============================================ -6A 00 # push 0(Interpreting) push imm8 6A ib -8F 04 25 20 00 00 10 # pop [STATE] pop r/m64 8F /0 00 000 100 00 100 101 -C3 # return ret C3 - -99 01 5D ################ ] ( -- ) rbracket ====================================================== -6A 01 # push 1(Compiling) push imm8 6A ib -8F 04 25 20 00 00 10 # pop [STATE] pop r/m64 8F /0 00 000 100 00 100 101 -C3 # return ret C3 - -99 81 5C ################ \ ( "ccc" -- ) backslash IMMEDIATE ================================ -48 8B 04 25 00 00 00 10 # rax = [#IN] mov r64, r/m64 REX.W 8B /r 00 000 100 00 100 101 -48 89 04 25 10 00 00 10 # [>IN] = rax mov r/m64, r64 REX.W 89 /r 00 000 100 00 100 101 -C3 # return ret C3 - -99 81 28 ################ ( ( "ccc" -- ) lparen IMMEDIATE ================================ -B1 29 B2 2A # (cl, dl) = (RP, RP+1) mov r8, imm8 B0+rb ib -99 50 # Call PARSE Forth 2012 implies comment ends at rparen or newline. -C3 # return ret C3 - -99 01 3A ################ : ( "ccc" -- ) colon ==================================== -99 70 # Call pname See Forth 2012 Table 2.1 -48 89 EE # rsi = rbp mov r/m64, r64 REX.W 89 /r 11 101 110 -BA 28 00 00 10 # rdx = Latest mov r32, imm32 B8+rd id -99 48 # Call Head -48 8B 0A # rcx = [rdx] mov r64, r/m64 REX.W 8B /r 00 001 010 -48 83 C1 10 # rcx += 10 add r/m64, imm8 REX.W 83 /0 ib 11 000 001 -80 09 40 # [rcx] |= 40(HIDDEN) or r/m8, imm8 80 /1 ib 00 001 001 -99 5D # Call ] -C3 # return ret C3 - -99 81 3B ################ ; ( C: -- ) semicolon IMMEDIATE ======================================== -B0 C3 # al = opcode ret mov r8, imm8 B0+rb ib -AA # [rdi++] = al stos m8 AA -48 8B 0C 25 28 00 00 10 # rcx = [Latest] mov r64, r/m64 REX.W 8B /r 00 001 100 00 100 101 -48 83 C1 10 # rcx += 10 add r/m64, imm8 REX.W 83 /0 ib 11 000 001 -80 21 BF # [rcx] &= BF(~HIDDEN) and r/m8, imm8 80 /4 ib 00 100 001 -99 5B # Call [ -C3 # return ret C3 - -99 01 2E ################ . ( char -- ) nonstandard name for C, ================================== -41 8A 07 # al = [r15] mov r8, r/m8 REX 8A /r 00 000 111 -49 83 C7 08 # r15 += 8 add r/m64, imm8 REX.W 83 /0 ib 11 000 111 -AA # [rdi++] = al stos m8 AA -C3 # return ret C3 - -99 83 4C 49 54 ########## LIT ( C: x -- ) ( -- x ) IMMEDIATE ===================================== TODO compare xchg r15, rsp ; push imm8 ; xchg r15, rsp -B8 49 83 EF 08 AB # compile r15 -= 8 sub r/m64, imm8 REX.W 83 /5 ib 11 101 111 -B8 6A 41 8F 07 AA # eax = push x ; pop [r15] push i8 ; pop r/m64 6A ib;REX 8F /0 00 000 111 -41 8A 07 AB # al = [r15] ; compile mov r8, r/m8 REX 8A /r 00 000 111 -49 83 C7 08 # r15 += 8 add r/m64, imm8 REX.W 83 /0 ib 11 000 111 -C3 # return ret C3 - -99 03 78 74 3D ########## xt= ( rbp=addr rax=u rbx=xt -- rbx=xt rax=? rdi=? eflags ) rbx == 0 or unhidden and matches -48 85 DB # rbx(xt) ? test r/m64, r64 REX.W 85 /r 11 011 011 -75 01 #+jump _nonzero if != 0 jnz rel8 75 cb -C3 # return ret C3 -# _nonzero: # -48 8B C8 # rcx = rax(u) mov r64, r/m64 REX.W 8B /r 11 001 000 -48 8D 73 10 # rsi = rbx(xt) + 10 lea r64, m REX.W 8D /r 01 110 011 -AC # al = [rsi++] lods m8 AC -A8 40 # al & 40(HIDDEN) ? test al, imm8 A8 ib -74 01 #+jump _unhidden if == 0 jz rel8 74 cb -C3 # return ret C3 -# _unhidden: # -48 83 E0 1F # rax &= 1F(Length) and r/m64, imm8 REX.W 83 /4 ib 11 100 000 -48 39 C8 # cmp rax, rcx cmp r/m64, r64 REX.W 39 /r 11 001 000 -74 01 #+jump _lengthEq if == je rel8 74 cb -C3 # return ret C3 -# _lengthEq: # -48 8B FD # rdi = rbp mov r64, r/m64 REX.W 8B /r 11 111 101 -F3 A6 # strings equal ? repe cmps m8, m8 F3 A6 -C3 # return ret C3 - -99 04 46 49 4E 44 ####### FIND ( rbp=addr rax=u -- rbp=addr rax=u rbx=xt ) xt==0 if not found ==== -48 8B 1C 25 28 00 00 10 # rbx = [Latest] mov r64, r/m64 REX.W 8B /r 00 011 100 00 100 101 -# _beg: # -E8 03 00 00 00 #+call (FIND) call rel32 E8 cd -75 F9 #-jump _beg if != jne rel8 75 cb -C3 # return ret C3 -# # # # # # # # # # # # # (FIND) -50 57 # push rax, rdi push r64 50+rd -99 78 # Call xt= -5F 58 # pop rdi, rax pop r64 58+rd -74 04 #+jump _end if == je rel8 74 cb -48 8B 5B 08 # rbx = [rbx+8] mov r64, r/m64 REX.W 8B /r 01 011 011 -# _end: # -C3 # return ret C3 - -99 03 4E 75 6D ########## Num ( rbp=addr rax=u -- n ) ============================================ -49 83 EF 08 # r15 -= 8 sub r/m64, imm8 REX.W 83 /5 ib 11 101 111 -49 83 27 00 # [r15] = 0 and r/m64, imm8 REX.W 83 /4 ib 00 100 111 -48 89 C1 # rcx = rax mov r/m64, r64 REX.W 89 /r 11 000 001 -48 8B F5 # rsi = rbp mov r64, r/m64 REX.W 8B /r 11 110 101 -# _beg: # -E8 03 00 00 00 #+call (Num) call rel32 E8 cd -E2 F9 #-jump beg if --rcx loop rel8 E2 cb -C3 # return ret C3 -# # # # # # # # # # # # # (Num) -AC # al = [rsi++] lods m8 AC -3C 41 # cmp al, 'A' cmp al, imm8 3C ib -7C 02 #+jump _digit if < jl rel8 7C cb -# _letter: # -2C 07 # al -= 7 sub al, imm8 2C ib -# _digit: # -2C 30 # al -= 30 sub al, imm8 2C ib -49 C1 27 04 # [r15] <<= 4 sal r/m64, imm8 REX.W C1 /4 ib 00 100 111 -49 09 07 # [r15] |= rax or r/m64, r64 REX.W 09 /r 00 000 111 -C3 # return ret C3 - -99 04 6D 69 73 73 ####### miss ( rbp=addr rax=u rbx=xt -- |n rbx=xt ) n present iff u nonzero ==== -48 85 DB # rbx(xt) ? test r/m64, r64 REX.W 85 /r 11 011 011 -74 01 #+jump (miss) if == 0 jz rel8 74 cb -C3 # return ret C3 -# # # # # # # # # # # # # (miss) -48 85 C0 # rax(u) ? test r/m64, r64 REX.W 85 /r 11 000 000 -75 01 #+jump _nonempty if != jne rel8 75 cb -C3 # return ret C3 -# _nonempty: # -99 4E # Call Num -F6 04 25 20 00 00 10 01 # [STATE] ? test r/m8, imm8 F6 /0 ib 00 000 100 00 100 101 -75 01 #+jump _lit if != 0 jnz rel8 75 cb -C3 # return ret C3 -# _lit: # -99 4C # Call LIT -C3 # return ret C3 - -99 04 45 58 45 43 ####### EXEC ( rbx=xt -- ) ===================================================== -B9 F8 FF FF 7F # rcx = _ mov r32, imm32 B8+rd id -57 # push rdi push r64 50+rd -89 CF # rdi = rcx mov r/m32, r32 89 /r 11 001 111 -99 43 # Call COMPL -B0 C3 # al = C3 mov r8, imm8 B0+rb ib -AA # [rdi++] = al stos m8 AA -5F # pop rdi pop r64 58+rd -FF D1 # call rcx call r/m64 FF /2 11 010 001 -C3 # return ret C3 - -99 04 65 78 65 63 ####### exec ( al rbx=xt -- ) iff al != 1 ====================================== -3C 01 # cmp al, 1 cmp al, imm8 3C ib -75 01 #+jump (exec) if != jne rel8 75 cb -C3 # return ret C3 -# # # # # # # # # # # # # (exec) -99 45 # Call EXEC -C3 # return ret C3 - -99 05 63 6F 6D 70 6C #### compl ( al -- al ) iff al == 1 ========================================== -3C 01 # cmp al, 1 cmp al, imm8 3C ib -74 01 #+jump (compl) if == je rel8 74 cb -C3 # return ret C3 -# # # # # # # # # # # # # (compl) -99 43 # Call COMPL -B0 01 # al = 1 mov r8, imm8 B0+rb ib -C3 # return ret C3 - -99 03 68 69 74 ########## hit ( rbx=xt -- ) ====================================================== -48 85 DB # rbx(xt) ? test r/m64, r64 REX.W 85 /r 11 011 011 -75 01 #+jump (hit) if != 0 jnz rel8 75 cb -C3 # return ret C3 -# # # # # # # # # # # # # (hit) -40 8A 43 10 # al = [rbx+10] mov r8, r/m8 REX 8A /r 01 000 011 -24 80 # al &= 80(IMMEDIATE) and al, imm8 24 ib -0A 04 25 20 00 00 10 # al |= [STATE] or r8, r/m8 0A /r 00 000 100 00 100 101 -99 63 # Call compl -99 65 # Call exec -C3 # return ret C3 - -99 04 53 56 41 4C ####### SVAL ( i*x -- j*x ) == 00 EVALUATE ===================================== -E8 03 00 00 00 #+call (SVAL) 05 call rel32 E8 cd -7C F9 #-jump SVAL if < 07 jl rel8 7C cb -C3 # return ret C3 -# # # # # # # # # # # # # (SVAL) 08 -99 70 # Call pname -99 46 # Call FIND -99 6D # Call miss -99 68 # Call hit -48 8B 04 25 10 00 00 10 # rax = [>IN] mov r64, r/m64 REX.W 8B /r 00 000 100 00 100 101 -48 3B 04 25 00 00 00 10 # cmp rax, [#IN] cmp r64, r/m64 REX.W 3B /r 00 000 100 00 100 101 -C3 # return ret C3 - -99 02 74 69 ############# ti ( -- ) text interpreter ============================================= -49 C7 C7 00 00 00 10 # r15(stack) = 10000000 mov r/m64, imm32 REX.W C7 /0 id 11 000 111 -49 89 77 08 # [TIB] = rsi mov r/m64, r64 REX.W 89 /r 01 110 111 -99 5B # Call [ -# _beg: # -E8 02 00 00 00 #+call (ti) call rel32 E8 cd -EB F9 #-jump _beg jmp rel8 EB cb -# # # # # # # # # # # # # (ti) -99 52 # Call REFILL -99 53 # Call SVAL -C3 # return ret C3 - -# 99 E4 99 C2 # dbg BYE - -99 F4 # ti +#BE B2 00 40 00 #:rsi(input) = 004000__ mov r32, imm32 B8+rd id +#BF 30 00 00 10 # rdi(output) = 10000030 mov r32, imm32 B8+rd id + +48 c7 c0 3C 00 00 00 # mov $0x3c,%rax +48 31 ff # xor %rdi,%rdi +0f 05 # syscall + +########################## binary interpreter >>> 82 <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +#E8 02 00 00 00 #+call (bi) call rel32 +#EB F9 #-jump bi 89 jmp rel8 EB cb +## # # # # # # # # # # # # (bi) 89 +#AC # al = [rsi++] lods m8 AC +#3C 99 # cmp al, 99(command) cmp al, imm8 3C ib +#74 02 #+jump _command if == 8E je rel8 74 cb +#AA # [rdi++] = al (xmit) stos m8 AA +#C3 # return ret C3 +## _command: # 90 +#BA 28 00 00 10 # rdx = Latest mov r32, imm32 B8+rd id +#AC # al = [rsi++] (argument) lods m8 AC +#A8 60 # al & 60(graphic)? test al, imm8 A8 ib +#74 31 #+jump Head if zero 9A jz rel8 74 cb +#48 8B 1A # rbx = [rdx] mov r64, r/m64 REX.W 8B /r 00 011 010 +## _find1: # 9D +#50 # push al push r64 50+rd +#24 7F # al &= 7F and al, imm8 24 ib +#3A 43 11 # cmp al, [rbx+11] cmp r8, r/m8 REX 3A /r 01 000 011 +#58 # pop al pop r64 58+rd +#74 06 #+jump _match if == A6 je rel8 74 cb +#48 8B 5B 08 # rbx = [rbx+8] mov r64, r/m64 REX.W 8B /r 01 011 011 +#EB F1 #-jump _find1 AC jmp rel8 EB cb +## _match: # AC +#A8 80 # al & 80(exec) ? test al, imm8 A8 ib +#74 09 #+jump COMPL if zero B0 jz rel8 74 cb +#FF 23 # jump [rbx] (exec) B2 jmp r/m64 REX FF /4 00 100 011 +# +########################## Interpreter subroutines ################################################ +# +#99 05 43 4F 4D 50 4C #### COMPL Forth's COMPILE, B9 ( ebx=xt -- ) +#B0 FF AA # compile >>>>>>>>>>>>>>>>> call r/m64 FF /2 00 010 100 00 100 101 +#B0 14 AA # al = _ mov r8, imm8 B0+rb ib +#B0 25 AA # [rdi++] = al stos m8 AA +#93 # eax = ebx xchg eax, r32 90+rd +#AB # [rdi(++4)] = eax stos m32 AB +#C3 # return ret C3 +# +#99 04 48 65 61 64 ####### Head ================= CB ( al=flag rdx=Latest rsi=addr -- rdx=Latest rsi=addr' ) +#48 83 C7 0F # rdi += 0F add r/m64, imm8 REX.W 83 /0 ib 11 000 111 +#48 83 E7 F0 # rdi &= F0 and r/m64, imm8 REX.W 83 /4 ib 11 100 111 +#48 8B 0A # rcx = [rdx] mov r64, r/m64 REX.W 8B /r 00 001 010 +#48 89 4F 08 # [rdi+8] = rcx mov r/m64, r64 REX.W 89 /r 01 001 111 +#48 89 3A # [rdx] = rdi mov r/m64, r64 REX.W 89 /r 00 111 010 +#48 83 C7 10 # rdi += 10 add r/m64, imm8 REX.W 83 /0 ib 11 000 111 +#AA # [rdi++] = al stos m8 AA +#91 # ecx = eax xchg eax, r32 90+rd +#83 E1 1F # ecx &= 1F and r/m32, imm8 83 /4 ib 11 100 001 +#F3 A4 # copy Name rep movs m8, m8 F3 A4 +#48 8B 0A # rcx = [rdx] mov r64, r/m64 REX.W 8B /r 00 001 010 +#48 89 39 # [rcx] = rdi mov r/m64, r64 REX.W 89 /r 00 111 001 +#C3 # return ret C3 +# +## ============= DICTIONARY FORMAT +## +## Each SmithForth dictionary entry begins with: +## (8 bytes) Code +## (8 bytes) Link +## (1 byte) Flag (3 bits) and Length (5 bits) of Name +## (Length bytes) Name, where Length < 2^5. +## Each subroutine call refers to its callee. See argument ZZ in the following example: +## +## WW WW WW WW WW WW WW WW # Code: address of a subroutine (usually right after Name) +## XX XX XX XX XX XX XX XX # Link: address of the next earlier dictionary entry +## YY # Flag: 80=IMMEDIATE, 40=HIDDEN ; Name Length +## 2E 53 # Name: .S ( -- ) show the values on the data stack +## 4D 89 7F F8 # [r15-8] = r15 (obuf) mov r/m64, r64 REX.W 89 /r 01 111 111 +## 49 C7 47 F0 00 00 00 10 # [r15-10] = 10000000 (len) mov r/m64, imm32 REX.W C7 /0 id 01 000 111 +## 4D 29 7F F0 # [r15-10] -= r15 sub r/m64, r64 REX.W 29 /r 01 111 111 +## 49 83 EF 10 # r15 -= 2 cells sub r/m64, imm8 REX.W 83 /5 ib 11 101 111 +## FF 14 25 ZZ ZZ ZZ ZZ # call TYPE call r/m64 FF /2 00 010 100 00 100 101 +## C3 # return ret C3 +# +#99 03 42 59 45 ########## BYE ( -- ) ============================================================= +#6A 3C 58 # rax = exit (no return) push imm8; pop 6A ib ; 58+rd +#31 FF # rdi = stat xor r/m32, r32 31 /r 11 111 111 +#0F 05 # syscall syscall 0F 05 +# +## 99 C2 # BYE +# +## Linux syscall: ( RDI RSI RDX R10 R8 R9 RAX=syscall# -- RAX=stat RCX=? R11=? ) +## Manual pages on system calls: `man 2 syscalls ; man 2 exit ; man 2 read ; man 2 write ; man 2 mmap` +## syscall numbers: /usr/include/x86_64-linux-gnu/asm/unistd_64.h +## syscall error numbers: /usr/include/asm-generic/errno-base.h +## mmap flag values: /usr/include/asm-generic/mman-common.h +# +#99 04 54 59 50 45 ####### TYPE ( rsi=addr rdx=u -- rsi=? rdi=? ) show memory [addr, addr+u) ====== +#6A 01 5F # rdi(fd) = stdout = 1 push imm8; pop 6A ib ; 58+rd +## _beg: # 00 +#8B C7 # rax = write = 1 = rdi mov r32, r/m32 8B /r 11 000 111 +#0F 05 # syscall syscall 0F 05 +#48 85 C0 # cmp rax, 0 test r/m64, r64 REX.W 85 /r 11 000 000 +#7C 08 #+jump _end if < 09 jl rel8 7C cb +#48 01 C6 # rsi(buf) += rax add r/m64, r64 REX.W 01 /r 11 000 110 +#48 29 C2 # rdx(cnt) -= rax sub r/m64, r64 REX.W 29 /r 11 000 010 +#7F EF #-jump _beg if > 11 jg rel8 7F cb +## _end: # 11 +#C3 # return ret C3 +# +## ============= DEBUGGING +## +## During development, a program like this one may crash with an uninformative error message like +## "Segmentation fault" or "Illegal instruction." How can we work in such an environment? +## We start with a trivial program that works (i.e., simply invokes syscall exit, as in BYE), +## and then expand it gradually until it does what we want. When a program breaks after a small +## change, we know where the bug is. Here is one way to go. +## +## Insert a jump to BYE at the top of the program. You have to compute the length of the jump. +## After this chore, updating it is easy if you expand the program only one instruction at a time. +## You will want to disable and enable parts of the program as you expand it. The most basic ways: +## -- Hide unwanted code in comments. If this disrupts byte counts, replace lost bytes by no-op +## instructions NOP = 90. +## -- Inside a subroutine, leave early by inserting a return instruction RET = C3. +# +#99 03 64 62 67 ########## dbg ( -- ) show stack and data; use `./SForth | xxd -o 0x0fffffe0` ===== +#56 57 # push rsi, rdi push r64 50+rd +#BE E0 FF FF 0F # rsi = addr mov r32, imm32 B8+rd id +#BA 00 0A 00 00 # rdx = u mov r32, imm32 B8+rd id +#99 54 # Call TYPE +#5F 5E # pop rdi, rsi pop r64 58+rd +#C3 # return ret C3 +# +## 99 E4 99 C2 # dbg BYE +# +#99 03 72 65 67 ########## reg ( -- ) show registers; use `./SForth | xxd` ======================== +#56 57 # push rsi, rdi push r64 50+rd +#41 57 57 41 56 56 # push r15, rdi, r14, rsi push r64 REX 50+rd +#41 55 55 41 54 54 # push r13, rbp, r12, rsp push r64 REX 50+rd +#41 53 53 41 52 52 # push r11, rbx, r10, rdx push r64 REX 50+rd +#41 51 51 41 50 50 # push r9 , rcx, r8 , rax push r64 REX 50+rd +#48 8B F4 # rsi = rsp mov r64, r/m64 REX.W 8B /r 11 110 100 +#BA 80 00 00 00 # rdx = u mov r32, imm32 B8+rd id +#99 54 # Call TYPE +#48 83 EC 80 # rsp -= -80 sub r/m64, imm8 REX.W 83 /5 ib 11 101 100 +#5F 5E # pop rdi, rsi pop r64 58+rd +#C3 # return ret C3 +# +## 99 F2 99 C2 # reg BYE +# +## ============= TEXT INTERPRETER +## +## Standard Forth handles input one line at a time. +## SmithForth's text interpreter is a simple interpreter in the standard Forth style. +## SVAL (see standard Forth's EVALUATE) interprets each line. +## REFILL fetches a line of input, including its trailing LF, and sets the input source state. +## 10000000 #IN cell contains #characters in the current line. +## 10000008 TIB cell contains the address where the current line begins. +## 10000010 >IN cell contains #characters in the current line that have been parsed. +## 10000020 STATE cell contains 0(Interpreting) or 1(Compiling). +## 10000028 Latest cell contains the execution token (xt) of the latest defined Forth word. +## In Forth, to parse is to remove from the input stream. As a line is parsed, [>IN] increases from 0 to [#IN]. +## Forth's "parse area" is the part of the line not yet parsed. +# +#99 06 52 45 46 49 4C 4C # REFILL ( -- ) ========================================================== +#49 C7 C1 00 00 00 10 # r9 = VAR mov r/m64, imm32 REX.W C7 /0 id 11 000 001 +#49 8B 01 # rax = [#IN] mov r64, r/m64 REX.W 8B /r 00 000 001 +#49 01 41 08 # [TIB] += rax add r/m64, r64 REX.W 01 /r 01 000 001 +#49 83 21 00 # [#IN] = 0 and r/m64, imm8 REX.W 83 /4 ib 00 100 001 +#49 83 61 10 00 # [>IN] = 0 and r/m64, imm8 REX.W 83 /4 ib 01 100 001 +## _beg: # 00 +#49 FF 01 # [#IN]++ inc r/m64 REX.W FF /0 00 000 001 +#49 8B 41 08 # rax = [TIB] mov r64, r/m64 REX.W 8B /r 01 000 001 +#49 03 01 # rax += [#IN] add r64, r/m64 REX.W 03 /r 00 000 001 +#80 78 FF 0A # cmp [rax-1], LF cmp r/m8, imm8 80 /7 ib 01 111 000 +#75 F0 #-jump _beg if != 10 jne rel8 75 cb +#C3 # return ret C3 +# +#99 04 73 65 65 6B ####### seek ( cl dl "ccc" -- eflags ) parse until 1st char of parse area is within [cl, dl) or parse area is empty +#49 C7 C1 00 00 00 10 # r9 = VAR mov r/m64, imm32 REX.W C7 /0 id 11 000 001 +#2A D1 # dl -= cl sub r8, r/m8 2A /r 11 010 001 +## _beg: # 00 like WITHIN ( al cl dl -- eflags ) +#49 8B 41 10 # rax = [>IN] mov r64, r/m64 REX.W 8B /r 01 000 001 +#49 3B 01 # cmp rax, [#IN] cmp r64, r/m64 REX.W 3B /r 00 000 001 +#73 16 #+jump _end if U>= 09 jae rel8 73 cb +#49 8B 41 08 # rax = [TIB] mov r64, r/m64 REX.W 8B /r 01 000 001 +#49 03 41 10 # rax += [>IN] add r64, r/m64 REX.W 03 /r 01 000 001 +#8A 00 # al = [rax] mov r8, r/m8 8A /r 00 000 000 +#2A C1 # al -= cl sub r8, r/m8 2A /r 11 000 001 +#3A C2 # cmp al, dl cmp r8, r/m8 3A /r 11 000 010 +#72 06 #+jump _end if U< 19 jb rel8 72 cb +#49 FF 41 10 # [>IN]++ inc r/m64 REX.W FF /0 01 000 001 +#EB E1 #-jump _beg 1F jmp rel8 EB cb +## _end: # 1F +#C3 # return ret C3 +# +#99 05 50 41 52 53 45 #### PARSE ( cl dl "ccc" -- rbp=addr rax=u ) addr: where ccc begins ; u: length of ccc +#49 C7 C1 00 00 00 10 # r9 = VAR mov r/m64, imm32 REX.W C7 /0 id 11 000 001 +#49 8B 69 10 # rbp = [>IN] mov r64, r/m64 REX.W 8B /r 01 101 001 +#99 73 # Call seek parse until 1st instance within [cl, dl) is parsed or parse area empty +#49 8B 41 10 # rax = [>IN] mov r64, r/m64 REX.W 8B /r 01 000 001 +#73 04 #+jump _end if U>= 00 jae rel8 73 cb +#49 FF 41 10 # [>IN]++ inc r/m64 REX.W FF /0 01 000 001 +## _end: # 04 +#48 29 E8 # rax -= rbp sub r/m64, r64 REX.W 29 /r 11 101 000 +#49 03 69 08 # rbp += [TIB] add r64, r/m64 REX.W 03 /r 01 101 001 +#C3 # return ret C3 +# +#99 05 70 6E 61 6D 65 #### pname ( "ccc" -- rbp=addr rax=u ) PARSE-NAME ============ +#B1 21 B2 7F # (cl, dl) = (BL+1, ...) mov r8, imm8 B0+rb ib +#99 73 # Call seek +#B1 7F B2 21 # (cl, dl) = (..., BL+1) mov r8, imm8 B0+rb ib +#99 50 # Call PARSE +#C3 # return ret C3 +# +#99 81 5B ################ [ ( -- ) lbracket IMMEDIATE ============================================ +#6A 00 # push 0(Interpreting) push imm8 6A ib +#8F 04 25 20 00 00 10 # pop [STATE] pop r/m64 8F /0 00 000 100 00 100 101 +#C3 # return ret C3 +# +#99 01 5D ################ ] ( -- ) rbracket ====================================================== +#6A 01 # push 1(Compiling) push imm8 6A ib +#8F 04 25 20 00 00 10 # pop [STATE] pop r/m64 8F /0 00 000 100 00 100 101 +#C3 # return ret C3 +# +#99 81 5C ################ \ ( "ccc" -- ) backslash IMMEDIATE ================================ +#48 8B 04 25 00 00 00 10 # rax = [#IN] mov r64, r/m64 REX.W 8B /r 00 000 100 00 100 101 +#48 89 04 25 10 00 00 10 # [>IN] = rax mov r/m64, r64 REX.W 89 /r 00 000 100 00 100 101 +#C3 # return ret C3 +# +#99 81 28 ################ ( ( "ccc" -- ) lparen IMMEDIATE ================================ +#B1 29 B2 2A # (cl, dl) = (RP, RP+1) mov r8, imm8 B0+rb ib +#99 50 # Call PARSE Forth 2012 implies comment ends at rparen or newline. +#C3 # return ret C3 +# +#99 01 3A ################ : ( "ccc" -- ) colon ==================================== +#99 70 # Call pname See Forth 2012 Table 2.1 +#48 89 EE # rsi = rbp mov r/m64, r64 REX.W 89 /r 11 101 110 +#BA 28 00 00 10 # rdx = Latest mov r32, imm32 B8+rd id +#99 48 # Call Head +#48 8B 0A # rcx = [rdx] mov r64, r/m64 REX.W 8B /r 00 001 010 +#48 83 C1 10 # rcx += 10 add r/m64, imm8 REX.W 83 /0 ib 11 000 001 +#80 09 40 # [rcx] |= 40(HIDDEN) or r/m8, imm8 80 /1 ib 00 001 001 +#99 5D # Call ] +#C3 # return ret C3 +# +#99 81 3B ################ ; ( C: -- ) semicolon IMMEDIATE ======================================== +#B0 C3 # al = opcode ret mov r8, imm8 B0+rb ib +#AA # [rdi++] = al stos m8 AA +#48 8B 0C 25 28 00 00 10 # rcx = [Latest] mov r64, r/m64 REX.W 8B /r 00 001 100 00 100 101 +#48 83 C1 10 # rcx += 10 add r/m64, imm8 REX.W 83 /0 ib 11 000 001 +#80 21 BF # [rcx] &= BF(~HIDDEN) and r/m8, imm8 80 /4 ib 00 100 001 +#99 5B # Call [ +#C3 # return ret C3 +# +#99 01 2E ################ . ( char -- ) nonstandard name for C, ================================== +#41 8A 07 # al = [r15] mov r8, r/m8 REX 8A /r 00 000 111 +#49 83 C7 08 # r15 += 8 add r/m64, imm8 REX.W 83 /0 ib 11 000 111 +#AA # [rdi++] = al stos m8 AA +#C3 # return ret C3 +# +#99 83 4C 49 54 ########## LIT ( C: x -- ) ( -- x ) IMMEDIATE ===================================== TODO compare xchg r15, rsp ; push imm8 ; xchg r15, rsp +#B8 49 83 EF 08 AB # compile r15 -= 8 sub r/m64, imm8 REX.W 83 /5 ib 11 101 111 +#B8 6A 41 8F 07 AA # eax = push x ; pop [r15] push i8 ; pop r/m64 6A ib;REX 8F /0 00 000 111 +#41 8A 07 AB # al = [r15] ; compile mov r8, r/m8 REX 8A /r 00 000 111 +#49 83 C7 08 # r15 += 8 add r/m64, imm8 REX.W 83 /0 ib 11 000 111 +#C3 # return ret C3 +# +#99 03 78 74 3D ########## xt= ( rbp=addr rax=u rbx=xt -- rbx=xt rax=? rdi=? eflags ) rbx == 0 or unhidden and matches +#48 85 DB # rbx(xt) ? test r/m64, r64 REX.W 85 /r 11 011 011 +#75 01 #+jump _nonzero if != 0 jnz rel8 75 cb +#C3 # return ret C3 +## _nonzero: # +#48 8B C8 # rcx = rax(u) mov r64, r/m64 REX.W 8B /r 11 001 000 +#48 8D 73 10 # rsi = rbx(xt) + 10 lea r64, m REX.W 8D /r 01 110 011 +#AC # al = [rsi++] lods m8 AC +#A8 40 # al & 40(HIDDEN) ? test al, imm8 A8 ib +#74 01 #+jump _unhidden if == 0 jz rel8 74 cb +#C3 # return ret C3 +## _unhidden: # +#48 83 E0 1F # rax &= 1F(Length) and r/m64, imm8 REX.W 83 /4 ib 11 100 000 +#48 39 C8 # cmp rax, rcx cmp r/m64, r64 REX.W 39 /r 11 001 000 +#74 01 #+jump _lengthEq if == je rel8 74 cb +#C3 # return ret C3 +## _lengthEq: # +#48 8B FD # rdi = rbp mov r64, r/m64 REX.W 8B /r 11 111 101 +#F3 A6 # strings equal ? repe cmps m8, m8 F3 A6 +#C3 # return ret C3 +# +#99 04 46 49 4E 44 ####### FIND ( rbp=addr rax=u -- rbp=addr rax=u rbx=xt ) xt==0 if not found ==== +#48 8B 1C 25 28 00 00 10 # rbx = [Latest] mov r64, r/m64 REX.W 8B /r 00 011 100 00 100 101 +## _beg: # +#E8 03 00 00 00 #+call (FIND) call rel32 E8 cd +#75 F9 #-jump _beg if != jne rel8 75 cb +#C3 # return ret C3 +## # # # # # # # # # # # # (FIND) +#50 57 # push rax, rdi push r64 50+rd +#99 78 # Call xt= +#5F 58 # pop rdi, rax pop r64 58+rd +#74 04 #+jump _end if == je rel8 74 cb +#48 8B 5B 08 # rbx = [rbx+8] mov r64, r/m64 REX.W 8B /r 01 011 011 +## _end: # +#C3 # return ret C3 +# +#99 03 4E 75 6D ########## Num ( rbp=addr rax=u -- n ) ============================================ +#49 83 EF 08 # r15 -= 8 sub r/m64, imm8 REX.W 83 /5 ib 11 101 111 +#49 83 27 00 # [r15] = 0 and r/m64, imm8 REX.W 83 /4 ib 00 100 111 +#48 89 C1 # rcx = rax mov r/m64, r64 REX.W 89 /r 11 000 001 +#48 8B F5 # rsi = rbp mov r64, r/m64 REX.W 8B /r 11 110 101 +## _beg: # +#E8 03 00 00 00 #+call (Num) call rel32 E8 cd +#E2 F9 #-jump beg if --rcx loop rel8 E2 cb +#C3 # return ret C3 +## # # # # # # # # # # # # (Num) +#AC # al = [rsi++] lods m8 AC +#3C 41 # cmp al, 'A' cmp al, imm8 3C ib +#7C 02 #+jump _digit if < jl rel8 7C cb +## _letter: # +#2C 07 # al -= 7 sub al, imm8 2C ib +## _digit: # +#2C 30 # al -= 30 sub al, imm8 2C ib +#49 C1 27 04 # [r15] <<= 4 sal r/m64, imm8 REX.W C1 /4 ib 00 100 111 +#49 09 07 # [r15] |= rax or r/m64, r64 REX.W 09 /r 00 000 111 +#C3 # return ret C3 +# +#99 04 6D 69 73 73 ####### miss ( rbp=addr rax=u rbx=xt -- |n rbx=xt ) n present iff u nonzero ==== +#48 85 DB # rbx(xt) ? test r/m64, r64 REX.W 85 /r 11 011 011 +#74 01 #+jump (miss) if == 0 jz rel8 74 cb +#C3 # return ret C3 +## # # # # # # # # # # # # (miss) +#48 85 C0 # rax(u) ? test r/m64, r64 REX.W 85 /r 11 000 000 +#75 01 #+jump _nonempty if != jne rel8 75 cb +#C3 # return ret C3 +## _nonempty: # +#99 4E # Call Num +#F6 04 25 20 00 00 10 01 # [STATE] ? test r/m8, imm8 F6 /0 ib 00 000 100 00 100 101 +#75 01 #+jump _lit if != 0 jnz rel8 75 cb +#C3 # return ret C3 +## _lit: # +#99 4C # Call LIT +#C3 # return ret C3 +# +#99 04 45 58 45 43 ####### EXEC ( rbx=xt -- ) ===================================================== +#B9 F8 FF FF 7F # rcx = _ mov r32, imm32 B8+rd id +#57 # push rdi push r64 50+rd +#89 CF # rdi = rcx mov r/m32, r32 89 /r 11 001 111 +#99 43 # Call COMPL +#B0 C3 # al = C3 mov r8, imm8 B0+rb ib +#AA # [rdi++] = al stos m8 AA +#5F # pop rdi pop r64 58+rd +#FF D1 # call rcx call r/m64 FF /2 11 010 001 +#C3 # return ret C3 +# +#99 04 65 78 65 63 ####### exec ( al rbx=xt -- ) iff al != 1 ====================================== +#3C 01 # cmp al, 1 cmp al, imm8 3C ib +#75 01 #+jump (exec) if != jne rel8 75 cb +#C3 # return ret C3 +## # # # # # # # # # # # # (exec) +#99 45 # Call EXEC +#C3 # return ret C3 +# +#99 05 63 6F 6D 70 6C #### compl ( al -- al ) iff al == 1 ========================================== +#3C 01 # cmp al, 1 cmp al, imm8 3C ib +#74 01 #+jump (compl) if == je rel8 74 cb +#C3 # return ret C3 +## # # # # # # # # # # # # (compl) +#99 43 # Call COMPL +#B0 01 # al = 1 mov r8, imm8 B0+rb ib +#C3 # return ret C3 +# +#99 03 68 69 74 ########## hit ( rbx=xt -- ) ====================================================== +#48 85 DB # rbx(xt) ? test r/m64, r64 REX.W 85 /r 11 011 011 +#75 01 #+jump (hit) if != 0 jnz rel8 75 cb +#C3 # return ret C3 +## # # # # # # # # # # # # (hit) +#40 8A 43 10 # al = [rbx+10] mov r8, r/m8 REX 8A /r 01 000 011 +#24 80 # al &= 80(IMMEDIATE) and al, imm8 24 ib +#0A 04 25 20 00 00 10 # al |= [STATE] or r8, r/m8 0A /r 00 000 100 00 100 101 +#99 63 # Call compl +#99 65 # Call exec +#C3 # return ret C3 +# +#99 04 53 56 41 4C ####### SVAL ( i*x -- j*x ) == 00 EVALUATE ===================================== +#E8 03 00 00 00 #+call (SVAL) 05 call rel32 E8 cd +#7C F9 #-jump SVAL if < 07 jl rel8 7C cb +#C3 # return ret C3 +## # # # # # # # # # # # # (SVAL) 08 +#99 70 # Call pname +#99 46 # Call FIND +#99 6D # Call miss +#99 68 # Call hit +#48 8B 04 25 10 00 00 10 # rax = [>IN] mov r64, r/m64 REX.W 8B /r 00 000 100 00 100 101 +#48 3B 04 25 00 00 00 10 # cmp rax, [#IN] cmp r64, r/m64 REX.W 3B /r 00 000 100 00 100 101 +#C3 # return ret C3 +# +#99 02 74 69 ############# ti ( -- ) text interpreter ============================================= +#49 C7 C7 00 00 00 10 # r15(stack) = 10000000 mov r/m64, imm32 REX.W C7 /0 id 11 000 111 +#49 89 77 08 # [TIB] = rsi mov r/m64, r64 REX.W 89 /r 01 110 111 +#99 5B # Call [ +## _beg: # +#E8 02 00 00 00 #+call (ti) call rel32 E8 cd +#EB F9 #-jump _beg jmp rel8 EB cb +## # # # # # # # # # # # # (ti) +#99 52 # Call REFILL +#99 53 # Call SVAL +#C3 # return ret C3 +# +## 99 E4 99 C2 # dbg BYE +# +#99 F4 # ti diff --git a/asm.rb b/asm.rb new file mode 100755 index 0000000..eb0362c --- /dev/null +++ b/asm.rb @@ -0,0 +1,174 @@ +#!/bin/env ruby + +PROG = [ + 0x7F, 0x45, 0x4C, 0x46, 0x02, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x3E, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x78, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, # e_entry: 0x0x00400078, (virtual memory address of entry point) + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00, + 0x01, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, # p_vaddr: 0x0x00400000, (virtual address of segment in memory) + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, # p_filesz: 0x0x00011E02, (size in bytes of the segment in the file) + 0x00, 0x00, 0xC0, 0x7F, 0x00, 0x00, 0x00, 0x00, # p_memsz: 0x0x7FC00000, (size in bytes of the segment in memory) + 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + +# 0x48, 0xc7, 0xc0, 0x3C, 0x00, 0x00, 0x00, # mov $0x3c,%rax +# 0x48, 0x31, 0xff, # xor %rdi,%rdi +# 0x0f, 0x05, # syscall +] + +Word = Struct.new(:name, :flags, :code) +IMM = 1 + +def push(arg); $stack.push(arg); end +def pop(); $stack.pop(); end +def do_word(code); code.each{|w| w.call() }; end +def word(name, flags, code) + code = code.map{|w| $words[w].code } + $words[name] = Word.new(name, flags, lambda { do_word(code) }) +end + +def builtin(name, &code) + $words[name] = Word.new(name, 0, code) +end + + +$stack = [] +$state = 0 +$curr = nil +$buffer = [] +$words = { + 'load-imm' => lambda { + puts "load-imm" + PROG.push(0x48) + PROG.push(0xc7) + PROG.push(0xC0 + $stack.pop) + imm = $stack.pop + PROG.push((imm >> 0) & 0xff) + PROG.push((imm >> 8) & 0xff) + PROG.push((imm >> 16) & 0xff) + PROG.push((imm >> 24) & 0xff) + }, +} + +builtin('.') { + if $stack.length <= 5 + pp $stack + else + pp (["..."] + $stack[-5..-1]) + end +} +builtin('+') { push(pop() + pop()) } +builtin('-') { push(pop() - pop()) } +builtin('*') { push(pop() * pop()) } +builtin('/') { push(pop() / pop()) } +builtin('%') { push(pop() % pop()) } + +builtin('word') do + if $buffer.length == 0 then + $buffer = gets.scan(/[^\s]+/).reverse + end + push $buffer.pop +end + +builtin('create') do + $curr = Word.new(pop(), 0, []) +end + +builtin('compile') do + $state = 1 +end + +builtin('interpret') do + $state = 0 +end + +builtin('register') do + code = $curr.code + $curr.code = lambda { do_word(code) } + $words[$curr.name] = $curr + pp $words +end + +word(':', IMM, %w[ + word + create + compile +]) + +word(';', IMM, %w[ + interpret + register +]) + + + + + + +builtin('rax') { push 0 } +builtin('rcx') { push 1 } +builtin('rdx') { push 2 } +builtin('rbx') { push 3 } +builtin('rsi') { push 4 } +builtin('rdi') { push 5 } +builtin('rsp') { push 6 } +builtin('rbp') { push 7 } +builtin('r8') { push 8 } +builtin('r9') { push 9 } +builtin('r10') { push 10 } +builtin('r11') { push 11 } +builtin('r12') { push 12 } +builtin('r13') { push 13 } +builtin('r14') { push 14 } +builtin('r15') { push 15 } + + +loop do + $words['word'].code.call() + w = $stack.pop + if $words[w] then + if $state == 0 || (($words[w].flags & IMM) == IMM) + $words[w].code.call + else + $curr.code << $words[w].code + end + elsif w =~ /^[0-9]+$/ + if $state == 0 + push w.to_i + else + $curr.code << lambda{ push w.to_i } + end + else + puts "?" + $stack = [] + end +end + + +#$stdin.each_line do |line| +# line.split.each do |w| +# end +#end +# +#File.read("asm.s").split.each do |w| +# if $words[w] +# $words[w].call() +# else +# $stack.push(w.to_i($base)) +# end +#end +# +## Set program length and write to disk +#PROG[96] = ((PROG.length >> 0) & 0xFF) +#PROG[97] = ((PROG.length >> 8) & 0xFF) +#PROG[98] = ((PROG.length >> 16) & 0xFF) +#PROG[99] = ((PROG.length >> 24) & 0xFF) +#File.binwrite("a.out", PROG.map{|b| b.chr }.join) + + diff --git a/asm.s b/asm.s new file mode 100644 index 0000000..e19d06a --- /dev/null +++ b/asm.s @@ -0,0 +1,3 @@ +60 rax load-imm +#rdi rdi xor +#syscall diff --git a/dis.sh b/dis.sh index a65e908..39a4504 100755 --- a/dis.sh +++ b/dis.sh @@ -1,2 +1,2 @@ #!/bin/sh -cc -g -c -o test.o test.c -O1 -fno-omit-frame-pointer && objdump -D test.o | less +cc -g -c -o test.o test.c -O0 -fomit-frame-pointer && objdump -d test.o diff --git a/test.c b/test.c index 6ca36b6..022f828 100644 --- a/test.c +++ b/test.c @@ -1,7 +1,35 @@ -int bar = 1; -int foo = 1; - -int add1(int a) +void test(void) { - return a + foo; +// asm("mov $60, %eax"); +// asm("xor %rdi, %rdi"); +// asm("syscall"); + + + + +// asm("mov $0x01, %rax"); +// asm("push %rax"); +// asm("mov $0x0102, %rbx"); +// asm("push %rbx"); +// asm("mov $0x01020304, %rcx"); +// asm("push %rcx"); +// asm("mov $0x0123456789, %rdx"); +// asm("push %rdx"); + + asm("push %rax"); + asm("push %rcx"); + asm("push %rdx"); + asm("push %rbx"); + asm("push %rsi"); + asm("push %rdi"); + asm("push %rsp"); + asm("push %rbp"); + asm("push %r8"); + asm("push %r9"); + asm("push %r10"); + asm("push %r11"); + asm("push %r12"); + asm("push %r13"); + asm("push %r14"); + asm("push %r15"); } -- 2.49.0