Compare commits
No commits in common. "stable" and "canary" have entirely different histories.
|
|
@ -1,4 +1,4 @@
|
||||||
|
disco
|
||||||
out
|
out
|
||||||
*.o
|
*.o
|
||||||
node_modules
|
node_modules
|
||||||
|
|
@ -1,3 +1,2 @@
|
||||||
link log
|
link log
|
||||||
const a = "a"
|
log("Hello World")
|
||||||
log("hello world")
|
|
||||||
BIN
disco_test
BIN
disco_test
Binary file not shown.
|
|
@ -1,19 +1,15 @@
|
||||||
bits 64
|
|
||||||
default rel
|
|
||||||
section .data
|
section .data
|
||||||
GSDGYLUR db 97,0
|
EFDNYLFZ db 72,101,108,108,111,32,87,111,114,108,100,0
|
||||||
STVGNPWI db 104,101,108,108,111,32,119,111,114,108,100,0
|
|
||||||
section .text
|
section .text
|
||||||
global _main
|
global _start
|
||||||
_main:
|
_start:
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
push qword [rel GSDGYLUR]
|
mov rdi, EFDNYLFZ
|
||||||
mov rdi, STVGNPWI
|
|
||||||
call _log
|
call _log
|
||||||
mov rsp, rbp
|
mov rsp, rbp
|
||||||
pop rbp
|
pop rbp
|
||||||
mov rax, 0x02000001
|
mov rax, 60
|
||||||
mov rdi, 0
|
mov rdi, 0
|
||||||
syscall
|
syscall
|
||||||
_log:
|
_log:
|
||||||
|
|
@ -28,12 +24,12 @@ _log_loop:
|
||||||
jmp _log_loop
|
jmp _log_loop
|
||||||
_log_loop_end:
|
_log_loop_end:
|
||||||
mov rdx, rbx
|
mov rdx, rbx
|
||||||
mov rax, 0x02000004
|
mov rax, 1
|
||||||
mov rdi, 1
|
mov rdi, 1
|
||||||
pop rsi
|
pop rsi
|
||||||
syscall
|
syscall
|
||||||
push 10
|
push 10
|
||||||
mov rax, 0x02000004
|
mov rax, 1
|
||||||
mov rdi, 1
|
mov rdi, 1
|
||||||
mov rsi, rsp
|
mov rsi, rsp
|
||||||
mov rdx, 1
|
mov rdx, 1
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ const localVariables = new Map();
|
||||||
const sections = {
|
const sections = {
|
||||||
preamble() {
|
preamble() {
|
||||||
if(process.platform === 'darwin') {
|
if(process.platform === 'darwin') {
|
||||||
return 'bits 64\ndefault rel\n';
|
return ' global _main\n';
|
||||||
} else {
|
} else {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
@ -37,21 +37,10 @@ const sections = {
|
||||||
},
|
},
|
||||||
text() {
|
text() {
|
||||||
if(process.platform === 'darwin') {
|
if(process.platform === 'darwin') {
|
||||||
return (
|
return 'section .text\n_main:\n push rbp\n mov rbp, rsp\n '
|
||||||
'section .text\n' +
|
+ statements.join('\n ')
|
||||||
' global _main\n' +
|
+ '\n mov rsp, rbp\n pop rbp\n mov rax, 0x02000001\n mov rdi, 0\n syscall\n'
|
||||||
'_main:\n' +
|
+ [...linkedLibraries.values()].map(({asmName, asm}) => asmName + ':\n' + asm).join('\n')
|
||||||
' push rbp\n' +
|
|
||||||
' mov rbp, rsp\n' +
|
|
||||||
statements.map(v => ` ${v}\n`).join('') +
|
|
||||||
' mov rsp, rbp\n' +
|
|
||||||
' pop rbp\n' +
|
|
||||||
' mov rax, 0x02000001\n' +
|
|
||||||
' mov rdi, 0\n' +
|
|
||||||
' syscall\n' + [...linkedLibraries.values()]
|
|
||||||
.map(({asmName, asm}) => asmName + ':\n' + asm)
|
|
||||||
.join('\n')
|
|
||||||
);
|
|
||||||
} else {
|
} else {
|
||||||
return 'section .text\n global _start\n_start:\n push rbp\n mov rbp, rsp\n '
|
return 'section .text\n global _start\n_start:\n push rbp\n mov rbp, rsp\n '
|
||||||
+ statements.join('\n ')
|
+ statements.join('\n ')
|
||||||
|
|
@ -117,10 +106,7 @@ function compileVariable(name, value) {
|
||||||
});
|
});
|
||||||
if(value.type === 'string') {
|
if(value.type === 'string') {
|
||||||
const variableName = compileStringLiteral(value.value);
|
const variableName = compileStringLiteral(value.value);
|
||||||
if(process.platform === 'darwin')
|
statements.push('push ' + variableName)
|
||||||
statements.push(`push qword [rel ${variableName}]`);
|
|
||||||
else
|
|
||||||
statements.push('push ' + variableName);
|
|
||||||
} else {
|
} else {
|
||||||
console.error('dont know how to set a variable to a non string lol')
|
console.error('dont know how to set a variable to a non string lol')
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@
|
||||||
import { readFileSync } from "fs";
|
import { readFileSync } from "fs";
|
||||||
import { compile } from "./compiler";
|
import { compile } from "./compiler";
|
||||||
import grammar from "./grammar";
|
import grammar from "./grammar";
|
||||||
|
import { tokenize } from "./tokenizer";
|
||||||
import colorize from "./util/asm/colorize";
|
import colorize from "./util/asm/colorize";
|
||||||
import tokenize from "./util/disco/tokenizer";
|
|
||||||
import { printTokens } from "./util/utils";
|
import { printTokens } from "./util/utils";
|
||||||
|
|
||||||
console.log();
|
console.log();
|
||||||
|
|
@ -32,8 +32,8 @@ const asmFile = compile(ast)
|
||||||
try {
|
try {
|
||||||
console.log();
|
console.log();
|
||||||
console.log('=== ASM ===');
|
console.log('=== ASM ===');
|
||||||
require('fs').writeFileSync('disco_test.asm', asmFile);
|
|
||||||
console.log(colorize(asmFile));
|
console.log(colorize(asmFile));
|
||||||
|
require('fs').writeFileSync('disco_test.asm', asmFile);
|
||||||
|
|
||||||
console.log();
|
console.log();
|
||||||
console.log('=== nasm ===');
|
console.log('=== nasm ===');
|
||||||
|
|
@ -59,7 +59,6 @@ function ld() {
|
||||||
require('child_process').execSync([
|
require('child_process').execSync([
|
||||||
'ld', 'disco_test.o',
|
'ld', 'disco_test.o',
|
||||||
'-o', 'disco_test',
|
'-o', 'disco_test',
|
||||||
'-no_pie',
|
|
||||||
'-macosx_version_min', '11.0',
|
'-macosx_version_min', '11.0',
|
||||||
'-L', '/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib',
|
'-L', '/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib',
|
||||||
'-lSystem'
|
'-lSystem'
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ export class Terminal extends Token { static terminal: true = true };
|
||||||
// these tokens are special, for formatting and generalization reasons.
|
// these tokens are special, for formatting and generalization reasons.
|
||||||
export class $Newline extends Terminal { }
|
export class $Newline extends Terminal { }
|
||||||
export class $Whitespace extends Terminal { }
|
export class $Whitespace extends Terminal { }
|
||||||
export class $EOF extends Terminal { }
|
|
||||||
|
|
||||||
function isTerminal(tokenClass: TokenClass): tokenClass is TerminalTokenClass {
|
function isTerminal(tokenClass: TokenClass): tokenClass is TerminalTokenClass {
|
||||||
return tokenClass.terminal;
|
return tokenClass.terminal;
|
||||||
|
|
|
||||||
|
|
@ -1,32 +1,50 @@
|
||||||
import { $Newline, Grammar, NonTerminal, Production, Terminal, Token } from "./earley";
|
import { Grammar, NonTerminal, Production, Terminal, Token } from "./earley";
|
||||||
import { AST } from './ast';
|
import { AST } from './ast';
|
||||||
import * as t from './util/disco/tokens';
|
|
||||||
|
export class $KeywordLink extends Terminal { }
|
||||||
|
export class $KeywordEquals extends Terminal { }
|
||||||
|
export class $KeywordLParen extends Terminal { }
|
||||||
|
export class $KeywordRParen extends Terminal { }
|
||||||
|
export class $KeywordConst extends Terminal { }
|
||||||
|
|
||||||
|
export class $String extends Terminal {}
|
||||||
|
export class $Identifier extends Terminal {}
|
||||||
|
|
||||||
|
export class $Newline extends Terminal { }
|
||||||
|
|
||||||
|
export class $Program extends NonTerminal { }
|
||||||
|
export class $Statement extends NonTerminal { }
|
||||||
|
export class $LinkStatement extends NonTerminal { }
|
||||||
|
export class $VariableDeclaration extends NonTerminal { }
|
||||||
|
export class $Expression extends NonTerminal { }
|
||||||
|
export class $InvocationExpression extends NonTerminal { }
|
||||||
|
export class $VariableReference extends NonTerminal { }
|
||||||
|
|
||||||
const ps: Production[] = [
|
const ps: Production[] = [
|
||||||
{ left: t.$Program, right: [t.$Statement], resolver: (s) => !!s ? AST.Body([s]) : AST.Body([]) },
|
{ left: $Program, right: [$Statement], resolver: (s) => !!s ? AST.Body([s]) : AST.Body([]) },
|
||||||
{ left: t.$Program, right: [t.$Statement, t.$Program], resolver: (s, ss) => !!s ? AST.Body([s, ...ss.value]) : ss},
|
{ left: $Program, right: [$Statement, $Program], resolver: (s, ss) => !!s ? AST.Body([s, ...ss.value]) : ss},
|
||||||
|
|
||||||
{ left: t.$Statement, right: [$Newline], resolver: () => false },
|
{ left: $Statement, right: [$Newline], resolver: () => false },
|
||||||
{ left: t.$Statement, right: [t.$LinkStatement], resolver: a => a },
|
{ left: $Statement, right: [$LinkStatement], resolver: a => a },
|
||||||
{ left: t.$Statement, right: [t.$VariableDeclaration], resolver: a => a },
|
{ left: $Statement, right: [$VariableDeclaration], resolver: a => a },
|
||||||
{ left: t.$Statement, right: [t.$Expression], resolver: a => a },
|
{ left: $Statement, right: [$Expression], resolver: a => a },
|
||||||
|
|
||||||
{ left: t.$Expression, right: [t.$String], resolver: (s: t.$String) => AST.String(s.value) },
|
{ left: $Expression, right: [$String], resolver: (s: $String) => AST.String(s.value) },
|
||||||
{ left: t.$Expression, right: [t.$InvocationExpression], resolver: a => a },
|
{ left: $Expression, right: [$InvocationExpression], resolver: a => a },
|
||||||
{ left: t.$Expression, right: [t.$VariableReference], resolver: a => a },
|
{ left: $Expression, right: [$VariableReference], resolver: a => a },
|
||||||
|
|
||||||
{ left: t.$VariableReference, right: [t.$Identifier], resolver: (identifier: t.$Identifier) => AST.VariableReference(identifier.value) },
|
{ left: $VariableReference, right: [$Identifier], resolver: (identifier: $Identifier) => AST.VariableReference(identifier.value) },
|
||||||
|
|
||||||
{ left: t.$InvocationExpression, right: [t.$Identifier, t.$KeywordLParen, t.$Expression, t.$KeywordRParen],
|
{ left: $InvocationExpression, right: [$Identifier, $KeywordLParen, $Expression, $KeywordRParen],
|
||||||
resolver: (identifier: t.$Identifier, _, arg: any, __) => AST.Invocation(identifier.value, arg) },
|
resolver: (identifier: $Identifier, _, arg: any, __) => AST.Invocation(identifier.value, arg) },
|
||||||
|
|
||||||
{ left: t.$VariableDeclaration, right: [t.$KeywordConst, t.$Identifier, t.$KeywordEquals, t.$Expression],
|
{ left: $VariableDeclaration, right: [$KeywordConst, $Identifier, $KeywordEquals, $Expression],
|
||||||
resolver: (_, identifier: t.$Identifier, __, value: any) => AST.Const(identifier.value, value) },
|
resolver: (_, identifier: $Identifier, __, value: any) => AST.Const(identifier.value, value) },
|
||||||
|
|
||||||
{ left: t.$LinkStatement, right: [t.$KeywordLink, t.$Identifier], resolver: (_, identifier: t.$Identifier) => AST.Link(identifier.value) },
|
{ left: $LinkStatement, right: [$KeywordLink, $Identifier], resolver: (_, identifier: $Identifier) => AST.Link(identifier.value) },
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
const grammar = new Grammar(ps, t.$Program);
|
const grammar = new Grammar(ps, $Program);
|
||||||
|
|
||||||
export default grammar;
|
export default grammar;
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
import * as chalk from 'chalk';
|
||||||
|
import { readFileSync, writeFileSync } from 'fs';
|
||||||
|
import { $Identifier, $KeywordConst, $KeywordEquals, $KeywordLink, $KeywordLParen, $KeywordRParen, $Newline, $String } from './grammar';
|
||||||
|
|
||||||
|
|
||||||
|
const keywords = new Map([
|
||||||
|
['=', $KeywordEquals],
|
||||||
|
['(', $KeywordLParen],
|
||||||
|
[')', $KeywordRParen],
|
||||||
|
['link', $KeywordLink],
|
||||||
|
['const', $KeywordConst],
|
||||||
|
]);
|
||||||
|
|
||||||
|
export function tokenize(string) {
|
||||||
|
let inString = false;
|
||||||
|
let escaping = false;
|
||||||
|
let tokens = [];
|
||||||
|
let token = '';
|
||||||
|
// let line = 1;
|
||||||
|
// let col = 1;
|
||||||
|
// const newline = () => (col = 1, line ++);
|
||||||
|
// const nextColumn = () => line ++;
|
||||||
|
const resetToken = () => token = '';
|
||||||
|
const addToken = (_token?) => {
|
||||||
|
if(_token) {
|
||||||
|
token = _token;
|
||||||
|
}
|
||||||
|
if(token.trim() !== '') {
|
||||||
|
if(keywords.has(token)) {
|
||||||
|
const kwTokenClass = keywords.get(token);
|
||||||
|
tokens.push(new kwTokenClass(0, 0, token));
|
||||||
|
} else if (isStringDelim(token[0]))
|
||||||
|
tokens.push(new $String(0, 0, token.substring(1, token.length - 1)));
|
||||||
|
else if (token === 'NEWLINE')
|
||||||
|
tokens.push(new $Newline(0, 0, token))
|
||||||
|
else
|
||||||
|
tokens.push(new $Identifier(0, 0, token));
|
||||||
|
resetToken();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// let _line = line;
|
||||||
|
// let _col = col;
|
||||||
|
|
||||||
|
const isWhitespace = (char) => [' ', '\n', '\t', '\r'].includes(char);
|
||||||
|
const isNewline = (char) => char === '\n';
|
||||||
|
const isSingleCharToken = (char) => ['(', ')', '='].includes(char);
|
||||||
|
const isStringDelim = (char) => ["'", '"'].includes(char);
|
||||||
|
const isEscapeChar = (char) => char === '\\';
|
||||||
|
const escape = (char) => (char === 'n' ? '\n'
|
||||||
|
: char === 't' ? '\t'
|
||||||
|
: char === 'r' ? '\r' : char)
|
||||||
|
|
||||||
|
for (const char of string) {
|
||||||
|
if(isNewline(char)) {
|
||||||
|
// newline();
|
||||||
|
addToken();
|
||||||
|
// only add newlines if we've actually started tokens...
|
||||||
|
if(tokens.length > 0)
|
||||||
|
addToken('NEWLINE')
|
||||||
|
} else if (escaping) {
|
||||||
|
token += escape(char)
|
||||||
|
escaping = false;
|
||||||
|
} else if (isStringDelim(char)) {
|
||||||
|
token += char;
|
||||||
|
inString = !inString;
|
||||||
|
} else if (inString) {
|
||||||
|
if(isEscapeChar(char)) {
|
||||||
|
escaping = true;
|
||||||
|
} else {
|
||||||
|
token += char
|
||||||
|
}
|
||||||
|
} else if(isSingleCharToken(char)) {
|
||||||
|
addToken();
|
||||||
|
addToken(char);
|
||||||
|
} else if(isWhitespace(char)) {
|
||||||
|
addToken();
|
||||||
|
} else {
|
||||||
|
token += char;
|
||||||
|
}
|
||||||
|
// if(!isNewline(char))
|
||||||
|
// nextColumn();
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
@ -26,12 +26,8 @@ export default new Grammar([
|
||||||
{ left: t.$Line, right: [t.$Global, t.$Identifier],
|
{ left: t.$Line, right: [t.$Global, t.$Identifier],
|
||||||
resolver: (_, {value}) => ` ${ansi(...keywordColor).bold('global')} ${ansi(...identifierColor)(value)}` },
|
resolver: (_, {value}) => ` ${ansi(...keywordColor).bold('global')} ${ansi(...identifierColor)(value)}` },
|
||||||
{ left: t.$Line, right: [t.$Identifier, t.$Colon], resolver: ({value}) => `${ansi(...identifierColor)(value)}:` },
|
{ left: t.$Line, right: [t.$Identifier, t.$Colon], resolver: ({value}) => `${ansi(...identifierColor)(value)}:` },
|
||||||
{ left: t.$Line, right: [t.$Bits, t.$Number], resolver: (_, n) => `${ansi(...keywordColor).bold('bits')} ${ansi(...numberColor)(n.value)}`},
|
|
||||||
{ left: t.$Line, right: [t.$Default, t.$Rel], resolver: () => `${ansi(...keywordColor).bold('default')} ${ansi(...keywordColor).bold('rel')}`},
|
|
||||||
|
|
||||||
// actual instructions
|
// actual instructions
|
||||||
{ left: t.$Line, right: [t.$Push, t.$DataSize, t.$LBracket, t.$Rel, t.$Identifier, t.$RBracket],
|
|
||||||
resolver: (_, size, __, ___, identifier) => ` ${ansi(...instructionColor)('push')} ${size} ${ansi(...pointerColor)('[')}${ansi(...keywordColor).bold('rel')} ${ansi(...identifierColor)(identifier.value)}${ansi(...pointerColor)(']')}` },
|
|
||||||
{ left: t.$Line, right: [t.$Push, t.$Value], resolver: (_, v) => ` ${ansi(...instructionColor)('push')} ${v}` },
|
{ left: t.$Line, right: [t.$Push, t.$Value], resolver: (_, v) => ` ${ansi(...instructionColor)('push')} ${v}` },
|
||||||
{ left: t.$Line, right: [t.$Pop, t.$Value], resolver: (_, v) => ` ${ansi(...instructionColor)('pop')} ${v}` },
|
{ left: t.$Line, right: [t.$Pop, t.$Value], resolver: (_, v) => ` ${ansi(...instructionColor)('pop')} ${v}` },
|
||||||
{ left: t.$Line, right: [t.$Cmp, t.$Register, t.$Comma, t.$Value],
|
{ left: t.$Line, right: [t.$Cmp, t.$Register, t.$Comma, t.$Value],
|
||||||
|
|
@ -56,10 +52,5 @@ export default new Grammar([
|
||||||
{ left: t.$Value, right: [t.$Identifier], resolver: (v) => ansi(...identifierColor)(v.value) },
|
{ left: t.$Value, right: [t.$Identifier], resolver: (v) => ansi(...identifierColor)(v.value) },
|
||||||
|
|
||||||
{ left: t.$CompoundString, right: [t.$Number], resolver: (n) => ansi(...numberColor)(n.value) },
|
{ left: t.$CompoundString, right: [t.$Number], resolver: (n) => ansi(...numberColor)(n.value) },
|
||||||
{ left: t.$CompoundString, right: [t.$Number, t.$Comma, t.$CompoundString], resolver: (n, _, ns) => ansi(...numberColor)(n.value) + ',' + ns },
|
{ left: t.$CompoundString, right: [t.$Number, t.$Comma, t.$CompoundString], resolver: (n, _, ns) => ansi(...numberColor)(n.value) + ',' + ns }
|
||||||
|
|
||||||
{ left: t.$DataSize, right: [t.$Word], resolver: (v) => ansi(...keywordColor).bold(v.value) },
|
|
||||||
{ left: t.$DataSize, right: [t.$DWord], resolver: (v) => ansi(...keywordColor).bold(v.value) },
|
|
||||||
{ left: t.$DataSize, right: [t.$QWord], resolver: (v) => ansi(...keywordColor).bold(v.value) },
|
|
||||||
{ left: t.$DataSize, right: [t.$OWord], resolver: (v) => ansi(...keywordColor).bold(v.value) },
|
|
||||||
], t.$Program);
|
], t.$Program);
|
||||||
|
|
@ -4,58 +4,28 @@ import {
|
||||||
$Newline,
|
$Newline,
|
||||||
} from "./../../earley";
|
} from "./../../earley";
|
||||||
|
|
||||||
const asmTokenizer = createTokenizer([
|
export default createTokenizer([
|
||||||
// whitespaces
|
{ match: /^[\r\t ]{1,}$/, token: null },
|
||||||
[ /^[\r\t ]{1,}/, null],
|
{ match: 'section', token: tokens.$Section },
|
||||||
[ /^\n/, $Newline],
|
{ match: 'db', token: tokens.$Db },
|
||||||
|
{ match: 'global', token: tokens.$Global },
|
||||||
// keywords
|
{ match: '\n', token: $Newline },
|
||||||
[ /^section/, tokens.$Section],
|
{ match: ':', token: tokens.$Colon },
|
||||||
[ /^db/, tokens.$Db],
|
{ match: ',', token: tokens.$Comma },
|
||||||
[ /^global/, tokens.$Global],
|
{ match: '[', token: tokens.$LBracket },
|
||||||
[ /^bits/, tokens.$Bits],
|
{ match: ']', token: tokens.$RBracket },
|
||||||
[ /^default/, tokens.$Default],
|
{ match: '-', token: tokens.$Minus },
|
||||||
[ /^rel/, tokens.$Rel],
|
{ match: 'mov', token: tokens.$Mov },
|
||||||
[ /^word/, tokens.$Word],
|
{ match: 'push', token: tokens.$Push },
|
||||||
[ /^dword/, tokens.$DWord],
|
{ match: 'pop', token: tokens.$Pop },
|
||||||
[ /^qword/, tokens.$QWord],
|
{ match: 'call', token: tokens.$Call },
|
||||||
[ /^oword/, tokens.$OWord],
|
{ match: 'syscall', token: tokens.$Syscall },
|
||||||
|
{ match: 'ret', token: tokens.$Ret },
|
||||||
// punctuation
|
{ match: 'je', token: tokens.$Je },
|
||||||
[ /^:/, tokens.$Colon],
|
{ match: 'jmp', token: tokens.$Jmp },
|
||||||
[ /^,/, tokens.$Comma],
|
{ match: 'cmp', token: tokens.$Cmp },
|
||||||
[ /^\[/, tokens.$LBracket],
|
{ match: 'inc', token: tokens.$Inc },
|
||||||
[ /^\]/, tokens.$RBracket],
|
{ match: /^[0-9]{1,}$/, token: tokens.$Number },
|
||||||
[ /^-/, tokens.$Minus],
|
{ match: /^(rbp|rsp|rax|rcx|rbx|rdx|rdi|rsi|al|bl|cl|dl|ah|bh|ch|dh|ax|bx|cx|dx|eax|ebx|ecx|edx)$/, token: tokens.$Register },
|
||||||
|
{ match: /^[A-Za-z._][A-Za-z_]{0,}$/, token: tokens.$Identifier },
|
||||||
// instructions
|
])
|
||||||
[ /^mov/, tokens.$Mov],
|
|
||||||
[ /^push/, tokens.$Push],
|
|
||||||
[ /^pop/, tokens.$Pop],
|
|
||||||
[ /^syscall/, tokens.$Syscall],
|
|
||||||
[ /^ret/, tokens.$Ret],
|
|
||||||
[ /^je/, tokens.$Je],
|
|
||||||
[ /^jmp/, tokens.$Jmp],
|
|
||||||
[ /^cmp/, tokens.$Cmp],
|
|
||||||
[ /^inc/, tokens.$Inc],
|
|
||||||
|
|
||||||
// pseudo-instructions
|
|
||||||
[ /^call/, tokens.$Call],
|
|
||||||
|
|
||||||
// 8 bit general purpose registers...
|
|
||||||
[ /^(al|ah|bl|bh|cl|ch|dl|dh)/, tokens.$Register ],
|
|
||||||
// 16 bit general purpose registers...
|
|
||||||
[ /^(ax|bx|cx|dx)/, tokens.$Register ],
|
|
||||||
// 32 bit general purpose registers...
|
|
||||||
[ /^(eax|ebx|ecx|edx)/, tokens.$Register ],
|
|
||||||
// 64 bit general purpose registers...
|
|
||||||
[ /^(rax|rbx|rcx|rdx)/, tokens.$Register ],
|
|
||||||
// other registers, idk.
|
|
||||||
[ /^(rbp|rsp|rdi|rsi)/, tokens.$Register],
|
|
||||||
|
|
||||||
// user-defined
|
|
||||||
[ /^[0-9]{1,}/, tokens.$Number],
|
|
||||||
[ /^0x[0-9A-Fa-f]{1,}/, tokens.$Number],
|
|
||||||
[ /^[A-Za-z._][A-Za-z_]{0,}/, tokens.$Identifier]
|
|
||||||
])
|
|
||||||
export default asmTokenizer;
|
|
||||||
|
|
@ -21,13 +21,6 @@ export class $RBracket extends Terminal { }
|
||||||
export class $Comma extends Terminal { }
|
export class $Comma extends Terminal { }
|
||||||
export class $Colon extends Terminal { }
|
export class $Colon extends Terminal { }
|
||||||
export class $Minus extends Terminal { }
|
export class $Minus extends Terminal { }
|
||||||
export class $Bits extends Terminal { }
|
|
||||||
export class $Default extends Terminal { }
|
|
||||||
export class $Rel extends Terminal { }
|
|
||||||
export class $Word extends Terminal { }
|
|
||||||
export class $DWord extends Terminal { }
|
|
||||||
export class $QWord extends Terminal { }
|
|
||||||
export class $OWord extends Terminal { }
|
|
||||||
|
|
||||||
// varying tokens
|
// varying tokens
|
||||||
export class $Identifier extends Terminal { }
|
export class $Identifier extends Terminal { }
|
||||||
|
|
@ -40,5 +33,4 @@ export class $Line extends NonTerminal { }
|
||||||
export class $PointerDereference extends NonTerminal { }
|
export class $PointerDereference extends NonTerminal { }
|
||||||
export class $Program extends NonTerminal { }
|
export class $Program extends NonTerminal { }
|
||||||
export class $CompoundString extends NonTerminal { }
|
export class $CompoundString extends NonTerminal { }
|
||||||
export class $Value extends NonTerminal { }
|
export class $Value extends NonTerminal { }
|
||||||
export class $DataSize extends NonTerminal { }
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
export function logASM(asm: string) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1,9 +0,0 @@
|
||||||
import { $Newline } from "../../earley";
|
|
||||||
import { createTokenizer } from "../generalTokenizer";
|
|
||||||
import * as t from './tokens';
|
|
||||||
|
|
||||||
export default createTokenizer([
|
|
||||||
[ /^[\r\t ]{1,}/, null],
|
|
||||||
[ /^\n/, $Newline],
|
|
||||||
[/[a-zA-Z][A-Za-z0-9]{0,}/, t.$Identifier],
|
|
||||||
])
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
import { NonTerminal, Terminal } from "../../earley";
|
|
||||||
|
|
||||||
export class $KeywordLink extends Terminal { }
|
|
||||||
export class $KeywordEquals extends Terminal { }
|
|
||||||
export class $KeywordLParen extends Terminal { }
|
|
||||||
export class $KeywordRParen extends Terminal { }
|
|
||||||
export class $KeywordConst extends Terminal { }
|
|
||||||
|
|
||||||
export class $String extends Terminal {}
|
|
||||||
export class $Identifier extends Terminal {}
|
|
||||||
|
|
||||||
export class $Program extends NonTerminal { }
|
|
||||||
export class $Statement extends NonTerminal { }
|
|
||||||
export class $LinkStatement extends NonTerminal { }
|
|
||||||
export class $VariableDeclaration extends NonTerminal { }
|
|
||||||
export class $Expression extends NonTerminal { }
|
|
||||||
export class $InvocationExpression extends NonTerminal { }
|
|
||||||
export class $VariableReference extends NonTerminal { }
|
|
||||||
|
|
@ -1,83 +1,72 @@
|
||||||
import { Terminal, TerminalTokenClass } from "../earley";
|
import { TerminalTokenClass } from "../earley";
|
||||||
import { Matcher } from "./regex";
|
import { inspect } from 'util';
|
||||||
|
|
||||||
type TokenMatcher = [ RegExp, TerminalTokenClass ];
|
interface TokenMatcher {
|
||||||
type Index = number;
|
match: RegExp | string,
|
||||||
|
token: TerminalTokenClass
|
||||||
interface Match {
|
|
||||||
regex: RegExp;
|
|
||||||
length: number;
|
|
||||||
tokenClass: TerminalTokenClass;
|
|
||||||
matchedString: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
// this is kinda bullshit lol exec is a dumb method.
|
|
||||||
function getFirstMatch(r: RegExp | Matcher, str: string): [Index, string] {
|
|
||||||
if (r instanceof RegExp) {
|
|
||||||
let matches = str.match(r);
|
|
||||||
if(matches === null) return [-1, ''];
|
|
||||||
return [matches.index, matches[0]];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const getMatchesFromTokenMatcher =
|
|
||||||
(str: string) =>
|
|
||||||
([regex, tokenClass]: TokenMatcher): Match =>
|
|
||||||
{
|
|
||||||
const [index, match] = getFirstMatch(regex, str);
|
|
||||||
if(index === -1) return null;
|
|
||||||
return {
|
|
||||||
regex,
|
|
||||||
tokenClass,
|
|
||||||
length: match.length,
|
|
||||||
matchedString: match
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const advanceLC = (l: number, c: number, str: string) => {
|
|
||||||
for(const char of str) {
|
|
||||||
c ++;
|
|
||||||
if(char === '\n') {
|
|
||||||
l ++;
|
|
||||||
c = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return [l, c];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function createTokenizer(tokenMap: TokenMatcher[]) {
|
export function createTokenizer(tokenMap: TokenMatcher[]) {
|
||||||
|
return function tokenize(str: string) {
|
||||||
|
let tokens = [];
|
||||||
|
let token = '';
|
||||||
|
let line = 1, column = 0;
|
||||||
|
for(let i = 0; i < str.length; i ++) {
|
||||||
|
const char = str[i];
|
||||||
|
const lookahead = (i < str.length - 1 ? str[i + 1] : null)
|
||||||
|
column++;
|
||||||
|
token += char;
|
||||||
|
|
||||||
return function tokenize(str: string, l = 1, c = 1): Terminal[] {
|
for(const {match: matcher, token: tokenClass} of tokenMap) {
|
||||||
|
if(typeof matcher === 'string') {
|
||||||
|
if(matcher === token) {
|
||||||
|
if(tokenClass !== null) {
|
||||||
|
tokens.push(new tokenClass(line, column - token.length + 1, token));
|
||||||
|
}
|
||||||
|
token = '';
|
||||||
|
} else {
|
||||||
|
// dw about it
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// matcher is regex...
|
||||||
|
// * note: this only tests if token contains a match, not that it _is_ a match
|
||||||
|
if(matcher.test(token)) {
|
||||||
|
if(lookahead) {
|
||||||
|
if(!matcher.test(token + lookahead)) {
|
||||||
|
// the next character would not match, so this must be the match.
|
||||||
|
// ! PS: it is possible that even though this would no longer
|
||||||
|
// ! match, another matcher could still match more.
|
||||||
|
// ! in those cases, we would want to expand on this logic
|
||||||
|
// ! to only match if there are no matches for any matcher
|
||||||
|
// ! in the lookahead.
|
||||||
|
// ! in practice this means tracking all possible non lookahead
|
||||||
|
// ! matches, then testing them for their lookahead afterwards
|
||||||
|
// ! in another loop, and only tokenizing if you have only one
|
||||||
|
// ! option, and that option will fail on the lookahead.
|
||||||
|
if(tokenClass !== null) {
|
||||||
|
tokens.push(new tokenClass(line, column - token.length + 1, token));
|
||||||
|
}
|
||||||
|
token = '';
|
||||||
|
} else {
|
||||||
|
// the lookahead matches this too, so we should probably hold off
|
||||||
|
// on tokenizing it...
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(tokenClass !== null) {
|
||||||
|
tokens.push(new tokenClass(line, column - token.length + 1, token));
|
||||||
|
}
|
||||||
|
token = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const possibleMatches: Match[] = tokenMap
|
if(char === '\n') {
|
||||||
.map(getMatchesFromTokenMatcher(str))
|
line ++;
|
||||||
.filter(v => !!v);
|
column = 0;
|
||||||
|
}
|
||||||
const longestLength = possibleMatches
|
|
||||||
.map(v => v.length)
|
|
||||||
.reduce((a, v) => a > v ? a : v, -Infinity);
|
|
||||||
|
|
||||||
const longestMatches = possibleMatches
|
|
||||||
.filter(v => v.length === longestLength);
|
|
||||||
|
|
||||||
console.assert(longestMatches.length > 0, 'No token matches found');
|
|
||||||
if(longestMatches.length === 0) process.exit(1);
|
|
||||||
|
|
||||||
const {tokenClass, matchedString} = longestMatches[0];
|
|
||||||
const length = matchedString.length;
|
|
||||||
const rest = str.substring(length);
|
|
||||||
|
|
||||||
|
|
||||||
const token = tokenClass ? new tokenClass(l, c, matchedString) : null;
|
|
||||||
if(rest === '') return [ token ];
|
|
||||||
|
|
||||||
[l, c] = advanceLC(l, c, str);
|
|
||||||
if(tokenClass) {
|
|
||||||
return [
|
|
||||||
new tokenClass(l, c, matchedString),
|
|
||||||
...tokenize(rest, l, c)
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
return token ? [token, ...tokenize(rest, l, c)] : tokenize(rest, l, c);
|
|
||||||
|
return tokens;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,176 +0,0 @@
|
||||||
type Match = {
|
|
||||||
offset: number;
|
|
||||||
length: number;
|
|
||||||
text: string;
|
|
||||||
original: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
const match = (offset: number, length: number, text: string, original: string): Match => {
|
|
||||||
return { offset, length, text, original };
|
|
||||||
}
|
|
||||||
|
|
||||||
export type Matcher = (str: string) => Match[]
|
|
||||||
|
|
||||||
export const matchChar = (char: string): Matcher => {
|
|
||||||
const matcher = (test: string) => {
|
|
||||||
return test[0] === char[0] ? [match(0, 1, test[0], test)] : []
|
|
||||||
}
|
|
||||||
matcher.toString = () => {
|
|
||||||
return char;
|
|
||||||
}
|
|
||||||
return matcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
export const matchCharClass = (chars: string[]): Matcher => {
|
|
||||||
const matcher = (test: string) => {
|
|
||||||
return chars.includes(test[0]) ? [match(0, 1, test[0], test)] : []
|
|
||||||
}
|
|
||||||
matcher.toString = () => {
|
|
||||||
return '[' + chars.join('') + ']';
|
|
||||||
}
|
|
||||||
return matcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
const combineMatches = (a: Match, b: Match): Match => {
|
|
||||||
return match(
|
|
||||||
Math.min(a.offset, b.offset),
|
|
||||||
a.length + b.length,
|
|
||||||
a.text + b.text,
|
|
||||||
a.original.length > b.original.length ? a.original : b.original
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
export const matchSequence = (matcherA: Matcher, matcherB: Matcher): Matcher => {
|
|
||||||
const matcher = (test: string) => {
|
|
||||||
const matches = [];
|
|
||||||
for (const match of matcherA(test)) {
|
|
||||||
const rest = test.substring(match.length);
|
|
||||||
for (const restMatch of matcherB(rest)) {
|
|
||||||
matches.push(combineMatches(match, restMatch));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return matches;
|
|
||||||
}
|
|
||||||
matcher.toString = () => {
|
|
||||||
return matcherA.toString() + matcherB.toString();
|
|
||||||
}
|
|
||||||
return matcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
const repeatMatcher = (matcher: Matcher, test: string, n: number): Match[] => {
|
|
||||||
if(n === 0) {
|
|
||||||
return [match(0, 0, '', test)];
|
|
||||||
}
|
|
||||||
const matches = matcher(test);
|
|
||||||
if(n === 1) {
|
|
||||||
return matches;
|
|
||||||
}
|
|
||||||
return matches.map(match => {
|
|
||||||
const rest = match.original.substring(match.length);
|
|
||||||
return repeatMatcher(matcher, rest, n - 1).map(nextMatch => combineMatches(match, nextMatch));
|
|
||||||
}).flat();
|
|
||||||
}
|
|
||||||
|
|
||||||
// this logic sucks lol
|
|
||||||
// really you should just keep matching until you
|
|
||||||
// have no more characters or you hit the match limit.
|
|
||||||
// like this shit increases O by 2 on each nested call...
|
|
||||||
// TODO /\ \/ /\ \/ /\ \/ /\ \/ /\ \/ /\ \/ /\ \/ /\
|
|
||||||
export const matchMany = (matcherA: Matcher, min = 1, max = Infinity): Matcher => {
|
|
||||||
const matcher = (test: string) => {
|
|
||||||
const rmatches: Match[] = [];
|
|
||||||
const limitedMax = Math.min(max, test.length);
|
|
||||||
for(let c = min; c <= limitedMax; c ++) {
|
|
||||||
const matches = repeatMatcher(matcherA, test, c);
|
|
||||||
rmatches.push(...matches);
|
|
||||||
}
|
|
||||||
return rmatches;
|
|
||||||
}
|
|
||||||
matcher.toString = () => {
|
|
||||||
return '(' + (matcherA.toString()) + '){' + (min === 0 ? '' : min) + ',' + (max === Infinity ? '' : max) + '}';
|
|
||||||
}
|
|
||||||
return matcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
// variable names regex, theory...
|
|
||||||
|
|
||||||
|
|
||||||
const matchers = [
|
|
||||||
matchChar('a'),
|
|
||||||
matchCharClass(['a', 'b', 'c']),
|
|
||||||
matchSequence(
|
|
||||||
matchChar('a'),
|
|
||||||
matchCharClass(['a', 'b', 'c'])
|
|
||||||
),
|
|
||||||
matchMany(
|
|
||||||
matchCharClass(['a', 'b', 'c'])
|
|
||||||
),
|
|
||||||
matchMany(
|
|
||||||
matchCharClass(['a', 'b', 'c']),
|
|
||||||
1,
|
|
||||||
1
|
|
||||||
),
|
|
||||||
];
|
|
||||||
|
|
||||||
const tests = [
|
|
||||||
'a',
|
|
||||||
'b',
|
|
||||||
'c',
|
|
||||||
'd',
|
|
||||||
'ab',
|
|
||||||
'bc',
|
|
||||||
'cd',
|
|
||||||
'da',
|
|
||||||
]
|
|
||||||
|
|
||||||
console.clear();
|
|
||||||
|
|
||||||
const logMatches = (ms: Match[]) => {
|
|
||||||
for(const match of ms) {
|
|
||||||
console.log(
|
|
||||||
' '.repeat(8) +
|
|
||||||
chalk.white(match.original.substring(0, match.offset)) +
|
|
||||||
chalk.green(match.text) +
|
|
||||||
chalk.white(match.original.substring(match.offset + match.length))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const Y = true;
|
|
||||||
const N = false;
|
|
||||||
const testMatrix = [
|
|
||||||
[Y, N, N, N, N, N, N, N],
|
|
||||||
[Y, Y, Y, N, N, N, N, N],
|
|
||||||
[N, N, N, N, Y, N, N, N],
|
|
||||||
[Y, Y, Y, N, Y, Y, N, N],
|
|
||||||
[Y, Y, Y, N, N, N, N, N]
|
|
||||||
]
|
|
||||||
import * as chalk from 'chalk';
|
|
||||||
// dirty levels off the CHARTS
|
|
||||||
let i = 0, j = 0, p = 0, f = 0;
|
|
||||||
for (const matcher of matchers) {
|
|
||||||
j = 0;
|
|
||||||
for (const testString of tests) {
|
|
||||||
const matches = matcher(testString).filter(match => match.length === testString.length);
|
|
||||||
if (matches.length > 0 === testMatrix[i][j]) {
|
|
||||||
p ++;
|
|
||||||
} else {
|
|
||||||
f ++;
|
|
||||||
console.log(
|
|
||||||
chalk.red('[ FAIL ]'),
|
|
||||||
chalk.ansi256(143)('/' + matcher.toString() + '/'),
|
|
||||||
'incorrectly returned',
|
|
||||||
matches.length,
|
|
||||||
'match' + (matches.length !== 1 ? 'es' : '') + ' for',
|
|
||||||
testString,
|
|
||||||
);
|
|
||||||
logMatches(matches);
|
|
||||||
console.log('')
|
|
||||||
}
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
console.log('' + p + ' test' + (p !== 1 ? 's' : '') + ' passed.')
|
|
||||||
console.log('' + f + ' test' + (f !== 1 ? 's' : '') + ' failed.')
|
|
||||||
process.exit(f);
|
|
||||||
|
|
@ -6,10 +6,10 @@ class $Plus extends Terminal { }
|
||||||
class $Newline extends Terminal { }
|
class $Newline extends Terminal { }
|
||||||
|
|
||||||
const tokenizer = createTokenizer([
|
const tokenizer = createTokenizer([
|
||||||
[ /^[0-9]{1,}$/, $Number ],
|
{ match: /^[0-9]{1,}$/, token: $Number },
|
||||||
[ /^[\r\t ]{1,}$/, null ],
|
{ match: /^[\r\t ]{1,}$/, token: null },
|
||||||
[ /\n/, $Newline ],
|
{ match: '\n', token: $Newline },
|
||||||
[ /+/, $Plus ],
|
{ match: '+', token: $Plus },
|
||||||
])
|
])
|
||||||
|
|
||||||
console.log(tokenizer("5 + \n 6 ").map(v => v.toString()).join(' '));
|
console.log(tokenizer("5 + \n 6 ").map(v => v.toString()).join(' '));
|
||||||
51
todo.md
51
todo.md
|
|
@ -1,42 +1,13 @@
|
||||||
# Todo List
|
# Todo List
|
||||||
|
|
||||||
- [x] colorize the assembly output
|
[x] colorize the assembly output
|
||||||
- [x] create generalTokenizer to make tokenization generic
|
[ ] rewrite disco tokenizer to the new generalTokenizer
|
||||||
- [ ] rewrite disco tokenizer to the new generalTokenizer
|
[ ] add number support
|
||||||
- [ ] explore defining non terminals in a grammar with just a string
|
[ ] add comment support
|
||||||
- possibly using tagged template strings??
|
[ ] add fixed length array support
|
||||||
- [ ] add an EOF token to earley, and yknow, add it to the tokenizer.
|
[ ] organize AST elements into classes
|
||||||
- [ ] add number support in consts
|
[ ] better logging of the AST
|
||||||
- [ ] add comment support
|
[ ] optionally artifically slow down compilation (for fun)
|
||||||
- [ ] add fixed length array support
|
[ ] implement some basic maths operations
|
||||||
- [ ] organize AST elements into classes
|
[ ] implement multi-argument invocations
|
||||||
- [ ] better logging of the AST
|
[ ] implement return values
|
||||||
- [ ] optionally artifically slow down compilation (for fun)
|
|
||||||
- [ ] implement functions
|
|
||||||
- [ ] implement some basic maths operations
|
|
||||||
- [ ] implement multi-argument invocations
|
|
||||||
- [ ] implement return values
|
|
||||||
- [ ] write a regex compiler
|
|
||||||
- [ ] write log in disco. creat a library for just doing syscalls. the rest can be done in disco
|
|
||||||
|
|
||||||
# Changelog
|
|
||||||
|
|
||||||
- fixed macos compilation to use relative addressing (i think)
|
|
||||||
- fixed a bug in the general tokenizer that failed to match some tokens properly.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
- create generalized tokenizer
|
|
||||||
- implement assembly language grammar for syntax highlighting
|
|
||||||
- create a vscode extension for syntax highlighting
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
- compile disco code to assembly as POC
|
|
||||||
- create an AST for disco code
|
|
||||||
- implement earley grammar for disco including:
|
|
||||||
- linking library functions
|
|
||||||
- calling functions
|
|
||||||
- string literals
|
|
||||||
- string variables
|
|
||||||
- created earley parser
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue