it is complete. well, the earley parser and basic grammar is anyways.

canary
Neon 2022-03-12 11:27:25 -05:00
parent a693c9673e
commit d47e0ee781
16 changed files with 1456 additions and 156 deletions

10
ast.js
View File

@ -1,10 +0,0 @@
module.exports = {
Body(statements) { return { type: 'body', value: statements } },
Link(identifier) { return { type: 'link', value: identifier } },
Invocation(identifier, ...args) { return { type: 'invo', value: identifier, args } },
Const(name, value) { return { type: 'const', value, name } },
Int(n) { return { type: 'int', value: n } },
String(s) { return { type: 'string', value: s } },
Variable(name, value) { return { type: 'var', value, name } },
VariableReference(name) { return { type: 'ref', value: name } },
}

View File

@ -1,21 +1,73 @@
[
{
"type": "KEYWORD",
"value": "LINK"
},
{
"type": "IDENTIFIER",
"value": "log1"
},
{
"type": "NEWLINE"
},
{
"type": "KEYWORD",
"value": "LINK"
},
{
"type": "IDENTIFIER",
"value": "log2"
}
"",
"NEWLINE",
"link",
"log1",
"NEWLINE",
"link",
"log2",
"NEWLINE",
"",
"NEWLINE",
"",
"NEWLINE",
"",
"NEWLINE",
"link",
"log",
"NEWLINE",
"const",
"test",
"",
"=",
"",
"\"Hello\"",
"NEWLINE",
"const",
"",
"",
"",
"test2",
"",
"=",
"\"Hello2\"",
"NEWLINE",
"const",
"test3",
"",
"=",
"",
"'Hello'",
"NEWLINE",
"const",
"a",
"=",
"\"5\"",
"NEWLINE",
"log",
"(",
"test",
")",
"",
"NEWLINE",
"log",
"(",
"\"World\"",
")",
"",
"NEWLINE",
"log",
"(",
"\"Hello\n \"Wor(l)d\"\n\\o/\"",
")",
"",
"NEWLINE",
"",
"NEWLINE",
"",
"NEWLINE",
"",
"NEWLINE",
"",
"NEWLINE"
]

View File

@ -1,8 +1,4 @@
link log1
link log2
link log
const test = "Hello"

View File

@ -1,26 +0,0 @@
#!/usr/bin/env node
const AST = require('./ast.js');
const compile = require('./compiler.js');
const myProgram = AST.Body([
AST.Variable('test1', AST.String('This String is Contained in a variable')),
AST.Variable('test2', AST.String('This is a second string in a variable')),
AST.Link('log'),
AST.Invocation('log', AST.String('hello')),
AST.Invocation('log', AST.String('world')),
AST.Invocation('log', AST.VariableReference('test1')),
AST.Invocation('log', AST.VariableReference('test2')),
AST.Invocation('log', AST.VariableReference('test1')),
AST.Invocation('log', AST.VariableReference('test2')),
]);
const asmFile = compile(myProgram)
try {
require('fs').writeFileSync('out.asm', asmFile);
require('child_process').execSync('nasm -f elf64 out.asm -o out.o', { stdio: 'inherit' });
require('child_process').execSync('ld out.o -o out', { stdio: 'inherit' });
require('child_process').execSync('./out', { stdio: 'inherit' });
} catch (e) {
process.exit(1);
}

BIN
disco_test 100755

Binary file not shown.

51
disco_test.asm 100644
View File

@ -0,0 +1,51 @@
section .data
VVQDDBDZ db 72,101,108,108,111,0
EWXBIBSR db 72,101,108,108,111,50,0
PJQDTHUC db 72,101,108,108,111,0
ECIATSPU db 53,0
GTZCFAMK db 87,111,114,108,100,0
YDHYSXWS db 72,101,108,108,111,10,32,34,87,111,114,40,108,41,100,34,10,92,111,47,0
section .text
global _start
_start:
push rbp
mov rbp, rsp
push VVQDDBDZ
push EWXBIBSR
push PJQDTHUC
push ECIATSPU
mov rdi, [rbp - 8]
call _log
mov rdi, GTZCFAMK
call _log
mov rdi, YDHYSXWS
call _log
mov rsp, rbp
pop rbp
mov rax, 60
mov rdi, 0
syscall
_log:
push rdi
mov rbx, 0
_log_loop:
mov cl, [rdi]
cmp cl, 0
je _log_loop_end
inc rdi
inc rbx
jmp _log_loop
_log_loop_end:
mov rdx, rbx
mov rax, 1
mov rdi, 1
pop rsi
syscall
push 10
mov rax, 1
mov rdi, 1
mov rsi, rsp
mov rdx, 1
syscall
pop rdi
ret

1125
output.ansi 100644

File diff suppressed because it is too large Load Diff

View File

@ -9,6 +9,7 @@
"dev": "yarn build && yarn start"
},
"dependencies": {
"@types/node": "^17.0.21",
"chalk": "3",
"typescript": "^4.6.2"
}

15
src/ast.ts 100644
View File

@ -0,0 +1,15 @@
export const AST = {
Body(statements: any[]): any {
return {
type: 'body',
value: statements
}
},
Link(identifier: any): any { return { type: 'link', value: identifier } },
Invocation(identifier: any, ...args: any[]): any { return { type: 'invo', value: identifier, args } },
Const(name: any, value: any): any { return { type: 'const', value, name } },
Int(n: any): any { return { type: 'int', value: n } },
String(s: any): any { return { type: 'string', value: s } },
Variable(name: any, value: any): any { return { type: 'var', value, name } },
VariableReference(name: any): any { return { type: 'ref', value: name } },
}

View File

@ -5,8 +5,8 @@ const rname = () => (new Array(8).fill(''))
]).join('');
const linkables = {
log: require('./linkables/log.js'),
exit: require('./linkables/exit.js')
log: require('../linkables/log.js'),
exit: require('../linkables/exit.js')
};
const callingConvention = {
@ -109,7 +109,7 @@ function compileStatement(item) {
compileInvocation(item.value, ...item.args);
break;
}
case 'var': {
case 'const': {
compileVariable(item.name, item.value);
break;
}
@ -119,11 +119,9 @@ function compileStatement(item) {
}
}
function compile(tree) {
export function compile(tree) {
for(const item of tree.value) {
compileStatement(item);
}
return sections.data() + '\n' + sections.text();
}
module.exports = compile;

51
src/disco.ts 100755
View File

@ -0,0 +1,51 @@
#!/usr/bin/env node
// const AST = require('../ast.js');
// const compile = require('../compiler.js');
import { readFileSync } from "fs";
import { compile } from "./compiler";
import grammar, { $Newline } from "./grammar";
import { tokenize } from "./tokenizer";
console.log();
console.log('=== Original ===');
const fileContents = readFileSync('./disco.disco').toString('utf-8');
console.log(fileContents)
console.log('=== Tokenization ===');
const tokens = tokenize(fileContents);
for(const token of tokens) {
process.stdout.write(token.toString() + ' ');
if(token instanceof $Newline) console.log();
}
console.log();
console.log('=== Parsing ===');
const ast = grammar.solveFor(tokens)[0];
console.log();
console.log('=== AST ===');
console.dir(ast, {
depth: Infinity
});
const asmFile = compile(ast)
try {
console.log();
console.log('=== ASM ===');
console.log(asmFile);
require('fs').writeFileSync('disco_test.asm', asmFile);
console.log();
console.log('=== nasm ===');
require('child_process').execSync('nasm -f elf64 disco_test.asm -o disco_test.o', { stdio: 'inherit' });
console.log('=== ld ===');
require('child_process').execSync('ld disco_test.o -o disco_test', { stdio: 'inherit' });
console.log('=== execute ===');
require('child_process').execSync('./disco_test', { stdio: 'inherit' });
} catch (e) {
process.exit(1);
}

View File

@ -3,7 +3,7 @@ import * as chalk from 'chalk';
const rgb2ansi = (r: number, g: number, b: number) => r * 36 + g * 6 + b + 16
const ansi = (r: number, g = r, b = r) => chalk.ansi256(rgb2ansi(r, g, b));
abstract class Token {
export abstract class Token {
l: number;
c: number;
static terminal: boolean;
@ -31,8 +31,9 @@ abstract class Token {
ansi(2)(')')
}
}
class NonTerminal extends Token { static terminal: false = false };
class Terminal extends Token { static terminal: true = true };
export class NonTerminal extends Token { static terminal: false = false };
export class Terminal extends Token { static terminal: true = true };
function isTerminal(tokenClass: TokenClass): tokenClass is TerminalTokenClass {
return tokenClass.terminal;
@ -46,14 +47,6 @@ type TerminalTokenClass = { new(...args: any[]) : Terminal, terminal: true }
type NonTerminalTokenClass = { new(...args: any[]) : NonTerminal, terminal: false }
type TokenClass = TerminalTokenClass | NonTerminalTokenClass;
// class Identifier extends Token { constructor(l, c, value) { super(l, c); this.value = value; } }
class $Number extends Terminal { value: string; constructor(l: number, c: number, value: string) { super(l, c); this.value = value; } }
class $Plus extends Terminal { }
class $Times extends Terminal { }
class $Term extends NonTerminal { }
class $Poop extends NonTerminal { }
class $Addition extends NonTerminal { }
function getTokenClassFromToken(token: Token): TokenClass {
return token.constructor as TokenClass;
}
@ -88,13 +81,13 @@ class TimeMachine<T> {
}
}
interface Production {
export interface Production {
left: TokenClass;
right: TokenClass[];
// resolver: (...args: any[]) => any;
resolver?: (...args: any[]) => any;
}
class Grammar {
export class Grammar {
private productions: Production[];
private startingSymbol: NonTerminalTokenClass;
@ -108,16 +101,17 @@ class Grammar {
const possibleStartingProductions = getProductionsForTokenClass(this.productions, this.startingSymbol)
for(const production of possibleStartingProductions) {
state.current.partialMatches.push(new PartialMatch(production, 0, state.currentIndex));
state.current.partialMatches.push(new PartialMatch(production, 0, state.currentIndex, []));
}
// expand all non terminals here again
const expand = (partial: PartialMatch) => {
if(partial.complete) {
const resolvedData = partial.resolve();
const pastPartials = state.stateByIndex(partial.source).partialMatches;
for(const pastPartial of pastPartials) {
if(pastPartial.nextTokenClass === partial.production.left) {
const newPartial = pastPartial.getAdvancedCopy();
const newPartial = pastPartial.getAdvancedCopy(resolvedData);
expand(newPartial);
state.current.partialMatches.push(newPartial);
}
@ -128,7 +122,7 @@ class Grammar {
if(isTerminal(nextTokenClass)) return;
const possibleProductions = getProductionsForTokenClass(this.productions, nextTokenClass);
for(const production of possibleProductions) {
const partialMatch = new PartialMatch(production, 0, state.currentIndex);
const partialMatch = new PartialMatch(production, 0, state.currentIndex, []);
expand(partialMatch);
state.current.partialMatches.push(partialMatch)
}
@ -139,7 +133,7 @@ class Grammar {
// expand all non terminals here
console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + this.startingSymbol.toString());
console.log(state.current.toString(), '\n\n')
console.log(state.current.toString(), '\n')
for(const token of tokens) {
state.newState();
@ -149,7 +143,7 @@ class Grammar {
if(partialMatch.complete) continue;
// if our current token falls in line with what we need, then yeah, lets do it.
if(token instanceof partialMatch.nextTokenClass) {
state.current.partialMatches.push(partialMatch.getAdvancedCopy());
state.current.partialMatches.push(partialMatch.getAdvancedCopy(token));
}
}
@ -158,8 +152,18 @@ class Grammar {
state.current.partialMatches.forEach(expand);
state.current.deduplicate()
console.log(state.current.toString(), '\n\n')
console.log(state.current.toString(), '\n')
}
const completedResolutions = [];
for(const partial of state.current.partialMatches) {
if(partial.complete && partial.source === 0) {
completedResolutions.push(partial.resolve());
}
}
return completedResolutions;
}
}
@ -182,10 +186,12 @@ class PartialMatch {
readonly production: Production;
readonly progress: number = 0;
readonly source: number = 0;
constructor(production: Production, completion: number, source: number) {
readonly resolvedData = [];
constructor(production: Production, completion: number, source: number, resolvedData: any[]) {
this.production = production;
this.progress = completion;
this.source = source;
this.resolvedData = resolvedData;
}
get complete() {
return this.production.right.length === this.progress;
@ -197,8 +203,15 @@ class PartialMatch {
// if()
// return getFirstTerminalsForTokenClass
// }
getAdvancedCopy() {
return new PartialMatch(this.production, this.progress + 1, this.source);
resolve() {
if('resolver' in this.production) {
return this.production.resolver(...this.resolvedData);
} else {
return this.resolvedData;
}
}
getAdvancedCopy(resolvedData: any) {
return new PartialMatch(this.production, this.progress + 1, this.source, [...this.resolvedData, resolvedData]);
}
toString() {
const rightSide = [];
@ -208,7 +221,7 @@ class PartialMatch {
rightSide.push(this.production.right[i].toString())
}
if(this.complete) addDot();
return this.production.left.toString() + ansi(2, 2, 2)(' => ') + rightSide.join(' ') + ansi(2, 2, 2)(' (' + this.source + ')')
return this.production.left.toString() + ansi(2, 2, 2)(' => ') + rightSide.join(' ') + ansi(2, 2, 2)(' (' + this.source + ')');
}
}
@ -237,28 +250,5 @@ class SingleEarleyState {
}
}
const tokens: Token[] = [
new $Number(1, 1, '45'),
new $Plus(1, 3),
new $Number(1, 1, '45'),
new $Times(1, 3),
new $Number(1, 1, '45'),
new $Plus(1, 3),
new $Number(1, 1, '45'),
]
const ps: Production[] = [
{
left: $Term, right: [$Addition, $Times, $Addition]
},
{
left: $Addition, right: [$Number, $Plus, $Number]
},
]
const grammar = new Grammar(ps, $Term);
console.log(grammar.solveFor(tokens));
// console.log(getFirstTerminalsForTokenClass(ps, $Term))

63
src/grammar.ts 100644
View File

@ -0,0 +1,63 @@
import { Grammar, NonTerminal, Production, Terminal, Token } from "./earley";
import { AST } from './ast';
export class $KeywordLink extends Terminal { }
export class $KeywordEquals extends Terminal { }
export class $KeywordLParen extends Terminal { }
export class $KeywordRParen extends Terminal { }
export class $KeywordConst extends Terminal { }
export class $String extends Terminal {
value: string;
constructor(l: number, c: number, value: string) {
super(l, c);
this.value = value;
}
}
export class $Identifier extends Terminal {
value: string;
constructor(l: number, c: number, value: string) {
super(l, c);
this.value = value;
}
}
export class $Newline extends Terminal { }
export class $Program extends NonTerminal { }
export class $Statement extends NonTerminal { }
export class $LinkStatement extends NonTerminal { }
export class $VariableDeclaration extends NonTerminal { }
export class $Expression extends NonTerminal { }
export class $InvocationExpression extends NonTerminal { }
export class $VariableReference extends NonTerminal { }
const ps: Production[] = [
{ left: $Program, right: [$Statement], resolver: (s) => !!s ? AST.Body([s]) : AST.Body([]) },
{ left: $Program, right: [$Statement, $Program], resolver: (s, ss) => !!s ? AST.Body([s, ...ss.value]) : ss},
{ left: $Statement, right: [$Newline], resolver: () => false },
{ left: $Statement, right: [$LinkStatement], resolver: a => a },
{ left: $Statement, right: [$VariableDeclaration], resolver: a => a },
{ left: $Statement, right: [$Expression], resolver: a => a },
{ left: $Expression, right: [$String], resolver: (s: $String) => AST.String(s.value) },
{ left: $Expression, right: [$InvocationExpression], resolver: a => a },
{ left: $Expression, right: [$VariableReference], resolver: a => a },
{ left: $VariableReference, right: [$Identifier], resolver: (identifier: $Identifier) => AST.VariableReference(identifier.value) },
{ left: $InvocationExpression, right: [$Identifier, $KeywordLParen, $Expression, $KeywordRParen],
resolver: (identifier: $Identifier, _, arg: any, __) => AST.Invocation(identifier.value, arg) },
{ left: $VariableDeclaration, right: [$KeywordConst, $Identifier, $KeywordEquals, $Expression],
resolver: (_, identifier: $Identifier, __, value: any) => AST.Const(identifier.value, value) },
{ left: $LinkStatement, right: [$KeywordLink, $Identifier], resolver: (_, identifier: $Identifier) => AST.Link(identifier.value) },
]
const grammar = new Grammar(ps, $Program);
export default grammar;

View File

@ -1,21 +1,17 @@
const chalk = require('chalk');
import * as chalk from 'chalk';
import { readFileSync, writeFileSync } from 'fs';
import { $Identifier, $KeywordConst, $KeywordEquals, $KeywordLink, $KeywordLParen, $KeywordRParen, $Newline, $String } from './grammar';
// const keywords = new Map([
// ['=', 'EQUALS'],
// ['(', 'LPAREN'],
// [')', 'RPAREN'],
// ['link', 'LINK'],
// ['const', 'CONST'],
// ]);
// const Tokens = {
// Keyword(str) { return { type: 'KEYWORD', value: keywords.get(str) } },
// Newline() { return { type: 'NEWLINE' } },
// Identifier(str) { return { type: 'IDENTIFIER', value: str } },
// String(str) { return { type: 'STRING', value: str } }
// }
const keywords = new Map([
['=', $KeywordEquals],
['(', $KeywordLParen],
[')', $KeywordRParen],
['link', $KeywordLink],
['const', $KeywordConst],
]);
function tokenize(string) {
export function tokenize(string) {
let inString = false;
let escaping = false;
let tokens = [];
@ -25,26 +21,26 @@ function tokenize(string) {
// const newline = () => (col = 1, line ++);
// const nextColumn = () => line ++;
const resetToken = () => token = '';
const addToken = (_token) => {
tokens.push(_token ?? token);
resetToken();
const addToken = (_token?) => {
if(_token) {
token = _token;
}
if(token.trim() !== '') {
if(keywords.has(token)) {
const kwTokenClass = keywords.get(token);
tokens.push(new kwTokenClass(0, 0));
} else if (isStringDelim(token[0]))
tokens.push(new $String(0, 0, token.substring(1, token.length - 1)));
else if (token === 'NEWLINE')
tokens.push(new $Newline(0, 0))
else
tokens.push(new $Identifier(0, 0, token));
resetToken();
}
}
// // let _line = line;
// // let _col = col;
// if(_token) {
// token = _token;
// }
// if(token.trim() !== '') {
// if(keywords.has(token))
// tokens.push(Tokens.Keyword(token));
// else if (isStringDelim(token[0]))
// tokens.push(Tokens.String(token));
// else if (token === 'NEWLINE')
// tokens.push(Tokens.Newline())
// else
// tokens.push(Tokens.Identifier(token));
// resetToken();
// }
// let _line = line;
// let _col = col;
const isWhitespace = (char) => [' ', '\n', '\t', '\r'].includes(char);
const isNewline = (char) => char === '\n';
const isSingleCharToken = (char) => ['(', ')', '='].includes(char);
@ -87,11 +83,3 @@ function tokenize(string) {
return tokens;
}
module.exports = tokenize;
const tokens = tokenize(require('fs').readFileSync('disco.disco').toString('utf-8'));
require('fs').writeFileSync('bytecode.json', JSON.stringify(tokens, null, 2))

View File

@ -4,7 +4,8 @@
"target": "ESNext",
"strictFunctionTypes": true,
"sourceMap": true,
"outDir": "out"
"outDir": "out",
"declaration": true
},
"include": [
"src/**/*.ts"

View File

@ -2,6 +2,11 @@
# yarn lockfile v1
"@types/node@^17.0.21":
version "17.0.21"
resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.21.tgz#864b987c0c68d07b4345845c3e63b75edd143644"
integrity sha512-DBZCJbhII3r90XbQxI8Y9IjjiiOGlZ0Hr32omXIZvwwZ7p4DMMXGrKXVyPfuoBOri9XNtL0UK69jYIBIsRX3QQ==
ansi-styles@^4.1.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"