# David Smith 2022 david.a.c.v.smith@gmail.com http://dacvs.neocities.org/
# This is the 1st of the 2 files that define SmithForth, a subroutine-threaded Forth for x86-64.
-# Understanding this file may be easier if first you watch my video series on hand-made
-# Linux x86 executables (featuring the ELF header and general-purpose x86 instructions):
-# https://www.youtube.com/playlist?list=PLZCIHSjpQ12woLj0sjsnqDH8yVuXwTy3p
+# ELF FILE HEADER
-# ============= ELF FILE HEADER
-#
-# Linux will run a computing job given the name of an executable file. An executable file
-# contains machine code for the processor and information for the operating system about the
-# layout of the file and the dimensions of the job. Working without the usual development
-# tools, we write this information by hand.
-
-7F 45 4C 46 # e_ident[EI_MAG]: ELF magic number
- 02 # e_ident[EI_CLASS]: 1: 32-bit, 2: 64-bit
- 01 # e_ident[EI_DATA]: 1: little-endian, 2: big-endian
- 01 # e_ident[EI_VERSION]: ELF header version; must be 1
- 00 # e_ident[EI_OSABI]: Target OS ABI; should be 0
-00 # e_ident[EI_ABIVERSION]: ABI version; 0 is ok for Linux
- 00 00 00 00 00 00 00 # e_ident[EI_PAD]: unused, should be 0
-02 00 # e_type: object file type; 2: executable
- 3E 00 # e_machine: instruction set architecture; 3: x86, 3E: amd64
- 01 00 00 00 # e_version: ELF identification version; must be 1
-78 00 40 00 00 00 00 00 # e_entry: memory address of entry point (where process starts)
-40 00 00 00 00 00 00 00 # e_phoff: file offset where program headers begin (34: 32-bit, 40: 64)
-00 00 00 00 00 00 00 00 # e_shoff: file offset where section headers begin
-00 00 00 00 # e_flags: 0 for x86
- 40 00 # e_ehsize: size of this header (34: 32-bit, 40: 64-bit)
- 38 00 # e_phentsize: size of each program header (20: 32-bit, 38: 64-bit)
-01 00 # e_phnum: number of program headers
- 40 00 # e_shentsize: size of each section header (28: 32-bit, 40: 64-bit)
- 00 00 # e_shnum: number of section headers
- 00 00 # e_shstrndx: index of section header containing section names
+7F 45 4C 46 02 01 01 00
+00 00 00 00 00 00 00 00
+02 00 3E 00 01 00 00 00
+78 00 40 00 00 00 00 00 # e_entry: 0x00400078 (virtual memory address of entry point)
+40 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 40 00 38 00
+01 00 40 00 00 00 00 00
# ============= ELF PROGRAM HEADER
-01 00 00 00 # p_type: segment type; 1: loadable
- 07 00 00 00 # p_flags: segment-dependent flags (1: X, 2: W, 4: R)
-00 00 00 00 00 00 00 00 # p_offset: file offset where segment begins
-00 00 40 00 00 00 00 00 # p_vaddr: virtual address of segment in memory (amd64: 00400000)
-00 00 00 00 00 00 00 00 # p_paddr: physical address of segment, unspecified by 386 supplement
-02 1E 01 00 00 00 00 00 ##### p_filesz: size in bytes of the segment in the file image (see make.sh)
-00 00 C0 7F 00 00 00 00 # p_memsz: (>= filesz) size in bytes of the segment in memory
-00 10 00 00 00 00 00 00 # p_align: 1000 for x86
+01 00 00 00 07 00 00 00
+00 00 00 00 00 00 00 00
+00 00 40 00 00 00 00 00 # p_vaddr: 0x00400000 (virtual address of segment in memory)
+00 00 00 00 00 00 00 00
+02 1E 01 00 00 00 00 00 # p_filesz: 0x00011E02 (size in bytes of the segment in the file image)
+00 00 C0 7F 00 00 00 00 # p_memsz: 0x7FC00000 ((>= filesz) size in bytes of the segment in memory)
+00 10 00 00 00 00 00 00
# ============= 64-BIT EXTENSIONS
#
# SmithForth has 64-bit Forth cells (i.e., integers) and uses the instruction set
# x86-64 (a.k.a. amd64). Changes from x86 to x86-64 are explained in Intel's
# manual (and in AMD's). See for instance Vol 1 Sec 3.2.1, Vol 2 Sec 2.2.1, and the
-# instruction set reference, Vol 2 Chs 3, 4, 5. There are many subtle details.
+# instruction set reference, Vol 2 Chs 3, 4, 5. There are many subtle details.
#
# In a nutshell, general-purpose registers are widened from 32 bits to 64. The old eight
# 32-bit general-purpose registers EAX, ECX, ..., EDI are still available as operands.
99 01 5D ################ ] ( -- ) rbracket ======================================================
6A 01 # push 1(Compiling) push imm8 6A ib
-8F 04 25 20 00 00 10 # pop [STATE] pop r/m64 8F /0 00 000 100 00 100 101
+8F 04 25 20 00 00 10 # pop [STATE] pop r/m64 8F /0 00 000 100 00 100 101
C3 # return ret C3
99 81 5C ################ \ ( "ccc<eol>" -- ) backslash IMMEDIATE ================================
48 39 C8 # cmp rax, rcx cmp r/m64, r64 REX.W 39 /r 11 001 000
74 01 #+jump _lengthEq if == je rel8 74 cb
C3 # return ret C3
-# _lengthEq: #
+# _lengthEq: #
48 8B FD # rdi = rbp mov r64, r/m64 REX.W 8B /r 11 111 101
F3 A6 # strings equal ? repe cmps m8, m8 F3 A6
C3 # return ret C3
C3 # return ret C3
# # # # # # # # # # # # # (FIND)
50 57 # push rax, rdi push r64 50+rd
-99 78 # Call xt=
+99 78 # Call xt=
5F 58 # pop rdi, rax pop r64 58+rd
74 04 #+jump _end if == je rel8 74 cb
48 8B 5B 08 # rbx = [rbx+8] mov r64, r/m64 REX.W 8B /r 01 011 011
99 03 4E 75 6D ########## Num ( rbp=addr rax=u -- n ) ============================================
49 83 EF 08 # r15 -= 8 sub r/m64, imm8 REX.W 83 /5 ib 11 101 111
49 83 27 00 # [r15] = 0 and r/m64, imm8 REX.W 83 /4 ib 00 100 111
-48 89 C1 # rcx = rax mov r/m64, r64 REX.W 89 /r 11 000 001
+48 89 C1 # rcx = rax mov r/m64, r64 REX.W 89 /r 11 000 001
48 8B F5 # rsi = rbp mov r64, r/m64 REX.W 8B /r 11 110 101
-# _beg: #
+# _beg: #
E8 03 00 00 00 #+call (Num) call rel32 E8 cd
E2 F9 #-jump beg if --rcx loop rel8 E2 cb
C3 # return ret C3
--- /dev/null
+//---------------------------------------------------------------------
+//
+//---------------------------------------------------------------------
+
+.set RETURN_STACK_SIZE, 8192
+.set BUFFER_SIZE, 4096
+
+.set F_IMMED, 0x80
+.set F_HIDDEN, 0x20
+.set F_LENMASK, 0x1f
+.set link, 0
+
+
+//---------------------------------------------------------------------
+// Register Usage
+//---------------------------------------------------------------------
+// x86_64 Parameter Registers:
+// rdi, rsi, rdx, rcx, r8, r9
+//
+// x86_64 Preserved Registers:
+// rbx, rsp, rbp, r12, r13, r14, r15
+
+.set PC, %rsi /* Program Counter */
+.set ASP, %rsp /* Argument Stack Pointer*/
+.set RSP, %rbp /* Return Stack Pointer */
+.set OA1, %rax /* Operator Argument Register 1 */
+.set OA2, %rbx /* Operator Argument Register 2 */
+
+
+//---------------------------------------------------------------------
+// Buffer Allocations
+//---------------------------------------------------------------------
+
+ .bss
+ .align 4096
+ .size return_stack, 8192
+return_stack:
+ .zero 8192
+return_stack_top: // Initial top of return stack.
+
+
+//---------------------------------------------------------------------
+// Assembler Macros
+//---------------------------------------------------------------------
+
+// Execute next instruction
+.macro NEXT
+ lodsq
+ jmpq *(OA1)
+.endm
+
+.macro PUSHRSP reg
+ leaq -8(%rbp),%rbp /* push reg on to return stack */
+ movq \reg,(%rbp)
+.endm
+
+.macro POPRSP reg
+ movq (%rbp),\reg /* pop top of return stack to reg */
+ leaq 8(%rbp),%rbp
+.endm
+
+.macro defcode name, namelen, flags=0, label
+ .data
+ .p2align 4
+ .global name_\label
+name_\label :
+ .quad link
+ .set link, name_\label
+ .byte \flags+\namelen
+ .ascii "\name"
+ .p2align 4
+ .global \label
+\label :
+ .quad code_\label
+ .text
+ .p2align 4, 0x90
+ .global code_\label
+code_\label :
+.endm
+
+.macro defword name, namelen, flags=0, label
+ .data
+ .p2align 4
+ .global name_\label
+name_\label :
+ .quad link
+ .set link,name_\label
+ .byte \flags+\namelen
+ .ascii "\name"
+ .p2align 4
+ .globl \label
+\label :
+ .quad DOCOL
+.endm
+
+.macro defconst name, namelen, flags=0, label, value
+defcode \name,\namelen,\flags,\label
+ pushq $\value
+ NEXT
+.endm
+
+//---------------------------------------------------------------------
+// Inner Interpreter Word
+//---------------------------------------------------------------------
+
+// Main Interpreter Definition
+ .text
+ .globl DOCOL
+ .p2align 4, 0x90
+DOCOL:
+ PUSHRSP PC
+ addq OA1, 8
+ mov PC, OA1
+ NEXT
+
+
+//---------------------------------------------------------------------
+// Core Interpreter Words
+//---------------------------------------------------------------------
+
+defword "quit",4,,quit
+ .quad rz, rsp_store /* R0 RSP!, clear the return stack */
+ .quad interpret /* interpret the next word */
+ .quad branch,-16 /* and loop (indefinitely) */
+
+defcode "interpret",9,,interpret
+ NEXT
+
+defword ":",1,,COLON
+ .quad word
+ .quad create
+ .quad lit, DOCOL, comma
+ .quad latest, fetch, hidden
+ .quad rbrack
+ .quad return
+
+defword ";",1,F_IMMED,SEMICOLON
+ .quad lit, return, comma
+ .quad latest, fetch, hidden
+ .quad lbrack
+ .quad return
+
+defcode "return",6,,return
+ POPRSP PC
+ NEXT
+
+defcode "word",4,,word
+ NEXT
+
+defcode "create",6,,create
+ NEXT
+
+defcode "lit",3,,lit
+ lodsq
+ pushq OA1
+ NEXT
+
+defcode ",",1,,comma
+ NEXT
+
+defcode "latest",6,,latest
+ NEXT
+
+defcode "@",1,,fetch
+ NEXT
+
+defcode "hidden",6,,hidden
+ NEXT
+
+defcode "[",1,,lbrack
+ NEXT
+
+defcode "]",1,,rbrack
+ NEXT
+
+
+//---------------------------------------------------------------------
+// Branching Words
+//---------------------------------------------------------------------
+
+defcode "branch",6,,branch
+ addq (%rsi), %rsi /* add the offset to the instruction pointer */
+ NEXT
+
+defcode "0branch",7,,zbranch
+ popq %rax
+ test %rax,%rax /* top of stack is zero? */
+ jz code_branch /* if so, jump back to the branch function above */
+ lodsq /* otherwise we need to skip the offset */
+ NEXT
+
+
+//---------------------------------------------------------------------
+// Return Stack Words
+//---------------------------------------------------------------------
+
+defconst "R0",2,,rz,return_stack_top
+
+defcode ">R",2,,to_r
+ popq %rax /* pop parameter stack into %eax */
+ PUSHRSP %rax /* push it on to the return stack */
+ NEXT
+
+defcode "R>",2,,from_r
+ POPRSP %rax /* pop return stack on to %eax */
+ pushq %rax /* and push on to parameter stack */
+ NEXT
+
+defcode "RSP@",4,,rsp_fetch
+ pushq %rbp
+ NEXT
+
+defcode "RSP!",4,,rsp_store
+ popq %rbp
+ NEXT
+
+defcode "RDROP",5,,rdrop
+ addq $8, %rbp
+ NEXT
+
+//---------------------------------------------------------------------
+// Builtin Primitives
+//---------------------------------------------------------------------
+// Legend:
+// i Integer value
+// f Fixed point value
+// F Floating point value
+// s String value
+// v Any value
+
+// (i - ) Exit the application with status code
+defcode "halt",4,,halt
+ popq %rdi
+ call _exit
+
+// (v - )Duplicates the top value on the stack
+defcode "drop",4,,drop
+ popq OA1
+ NEXT
+
+// (v1 v2 - v2 v1) Swap the top two values on the stack
+defcode "swap",4,,swap
+ popq OA1
+ popq OA2
+ pushq OA2
+ pushq OA1
+ NEXT
+
+// (v - v v) Duplicate the top value on the stack
+defcode "dup",3,,dup
+ movq (ASP), OA1
+ pushq OA1
+ NEXT
+
+// (v1 v2 - v1 v2 v1) Duplicate second item on the stack
+defcode "over",4,,over
+ movq 8(ASP), OA1
+ pushq OA1
+ NEXT
+
+// defcode rot
+// defcode -rot
+// defcode 2drop
+// defcode 2dup
+// defcode 2swap
+// defcode ?dup
+
+// defcode 1+
+// defcode 1-
+// defcode 4+
+// defcode 4-
+// defcode 8+
+// defcode 8-
+
+// defcode +
+// defcode -
+// defcode *
+// defcode /
+// defcode %
+
+// defcode =
+// defcode !=
+// defcode <
+// defcode >
+// defcode <=
+// defcode >=
+
+// defcode 0=
+// defcode 0!=
+// defcode 0<
+// defcode 0>
+// defcode 0<=
+// defcode 0>=
+
+// defcode and
+// defcode or
+// defcode xor
+// defcode not
+
+
+// defcode ret
+
+
+// defcode !
+// defcode @
+// defcode +!
+// defcode -!
+// defcode b!
+// defcode b@
+// defcode b@b!
+// defcode bmove
+
+
+// defcode >r
+// defcode r>
+// defcode rsp@
+// defcode rsp!
+// defcode rdrop
+
+// defcode asp@
+// defcode asp!
+
+// defcode getc
+// defcode putc
+// defcode word
+// defcode number
+// defcode find
+// defcode >cfa
+// defcode >dfa
+// defcode create
+// defcode ,
+// defcode [
+// defcode ]
+// defcode :
+// defcode ;
+// defcode immediate
+// defcode hidden
+// defcode hide
+// defcode '
+
+// defcode branch
+// defcode 0branch
+// defcode litstring
+// defcode tell
+// defcode quit
+// defcode interpret
+
+// defcode char
+// defcode execute
+// defcode syscall // or EFI call
+
+// Floating point operations
+// defcode +.
+// defcode -.
+// defcode *.
+// defcode /.
+
+//---------------------------------------------------------------------
+// Main Routine
+//---------------------------------------------------------------------
+ .text
+ .global _main
+ .global main
+ .p2align 4, 0x90
+_main:
+main:
+ cld
+ mov $return_stack_top, RSP
+ mov $cold_start, PC
+ NEXT
+
+// pushq OA1
+// pushq %rbp
+// leaq return_stack_top(%rip), RSP
+// leaq cold_start(%rip), PC
+// NEXT
+
+// First instructions to execute by the interpreter
+ .data
+ .global cold_start
+ .p2align 4
+cold_start:
+ .quad quit