listen, the parser works
parent
3c99623b6a
commit
a693c9673e
|
|
@ -1,3 +1,4 @@
|
|||
disco
|
||||
out
|
||||
*.o
|
||||
node_modules
|
||||
16
ast.js
16
ast.js
|
|
@ -1,10 +1,10 @@
|
|||
module.exports = {
|
||||
Body(statements) { return { type: 'body', value: statements } },
|
||||
Link(identifier) { return { type: 'link', value: identifier } },
|
||||
Invocation(identifier, ...args) { return { type: 'invo', value: identifier, args } },
|
||||
Const(name, value) { return { type: 'const', value, name } },
|
||||
Int(n) { return { type: 'int', value: n } },
|
||||
String(s) { return { type: 'string', value: s } },
|
||||
Variable(name, value) { return { type: 'var', value, name } },
|
||||
VariableReference(name) { return { type: 'ref', value: name } }
|
||||
Body(statements) { return { type: 'body', value: statements } },
|
||||
Link(identifier) { return { type: 'link', value: identifier } },
|
||||
Invocation(identifier, ...args) { return { type: 'invo', value: identifier, args } },
|
||||
Const(name, value) { return { type: 'const', value, name } },
|
||||
Int(n) { return { type: 'int', value: n } },
|
||||
String(s) { return { type: 'string', value: s } },
|
||||
Variable(name, value) { return { type: 'var', value, name } },
|
||||
VariableReference(name) { return { type: 'ref', value: name } },
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
[
|
||||
{
|
||||
"type": "KEYWORD",
|
||||
"value": "LINK"
|
||||
},
|
||||
{
|
||||
"type": "IDENTIFIER",
|
||||
"value": "log1"
|
||||
},
|
||||
{
|
||||
"type": "NEWLINE"
|
||||
},
|
||||
{
|
||||
"type": "KEYWORD",
|
||||
"value": "LINK"
|
||||
},
|
||||
{
|
||||
"type": "IDENTIFIER",
|
||||
"value": "log2"
|
||||
}
|
||||
]
|
||||
16
disco.disco
16
disco.disco
|
|
@ -1,4 +1,18 @@
|
|||
|
||||
link log1
|
||||
link log2
|
||||
|
||||
|
||||
|
||||
link log
|
||||
log("Hello")
|
||||
const test = "Hello"
|
||||
const test2 ="Hello2"
|
||||
const test3 = 'Hello'
|
||||
const a="5"
|
||||
log(test)
|
||||
log("World")
|
||||
log("Hello\n \"Wor(l)d\"\n\\o/")
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,7 @@
|
|||
module.exports = {
|
||||
asmName: '_log_time',
|
||||
asm: `\
|
||||
mov rax, 30
|
||||
|
||||
ret`
|
||||
}
|
||||
|
|
@ -7,5 +7,9 @@
|
|||
"build": "nasm -f elf64 disco.asm -o disco.o && ld disco.o -o disco",
|
||||
"start": "./disco",
|
||||
"dev": "yarn build && yarn start"
|
||||
},
|
||||
"dependencies": {
|
||||
"chalk": "3",
|
||||
"typescript": "^4.6.2"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,264 @@
|
|||
import * as chalk from 'chalk';
|
||||
|
||||
const rgb2ansi = (r: number, g: number, b: number) => r * 36 + g * 6 + b + 16
|
||||
const ansi = (r: number, g = r, b = r) => chalk.ansi256(rgb2ansi(r, g, b));
|
||||
|
||||
abstract class Token {
|
||||
l: number;
|
||||
c: number;
|
||||
static terminal: boolean;
|
||||
constructor(l: number, c: number) {
|
||||
this.l = l;
|
||||
this.c = c;
|
||||
}
|
||||
static toString() {
|
||||
if(this.terminal) {
|
||||
return ansi(0, 3, 2)('$') + ansi(0, 5, 3)(`${this.name.substring(1)}`)
|
||||
} else {
|
||||
return ansi(0, 2, 3)('$') + ansi(0, 3, 5)(`${this.name.substring(1)}`)
|
||||
}
|
||||
}
|
||||
valueToString() {
|
||||
return this.constructor.toString();
|
||||
}
|
||||
toString() {
|
||||
return ansi(2)('(') +
|
||||
this.valueToString() +
|
||||
ansi(2)(':') +
|
||||
ansi(3)(this.l) +
|
||||
ansi(2)(':') +
|
||||
ansi(3)(this.c) +
|
||||
ansi(2)(')')
|
||||
}
|
||||
}
|
||||
class NonTerminal extends Token { static terminal: false = false };
|
||||
class Terminal extends Token { static terminal: true = true };
|
||||
|
||||
function isTerminal(tokenClass: TokenClass): tokenClass is TerminalTokenClass {
|
||||
return tokenClass.terminal;
|
||||
}
|
||||
|
||||
function isNonTerminal(tokenClass: TokenClass): tokenClass is NonTerminalTokenClass {
|
||||
return !tokenClass.terminal;
|
||||
}
|
||||
|
||||
type TerminalTokenClass = { new(...args: any[]) : Terminal, terminal: true }
|
||||
type NonTerminalTokenClass = { new(...args: any[]) : NonTerminal, terminal: false }
|
||||
type TokenClass = TerminalTokenClass | NonTerminalTokenClass;
|
||||
|
||||
// class Identifier extends Token { constructor(l, c, value) { super(l, c); this.value = value; } }
|
||||
class $Number extends Terminal { value: string; constructor(l: number, c: number, value: string) { super(l, c); this.value = value; } }
|
||||
class $Plus extends Terminal { }
|
||||
class $Times extends Terminal { }
|
||||
class $Term extends NonTerminal { }
|
||||
class $Poop extends NonTerminal { }
|
||||
class $Addition extends NonTerminal { }
|
||||
|
||||
function getTokenClassFromToken(token: Token): TokenClass {
|
||||
return token.constructor as TokenClass;
|
||||
}
|
||||
|
||||
class TimeMachine<T> {
|
||||
states: T[] = [];
|
||||
stateConstructor: () => T;
|
||||
constructor(fn: () => T) {
|
||||
this.stateConstructor = fn;
|
||||
this.newState();
|
||||
}
|
||||
newState() {
|
||||
this.states.push(this.stateConstructor());
|
||||
}
|
||||
get current() {
|
||||
return this.states[this.states.length - 1];
|
||||
}
|
||||
get previousState() {
|
||||
console.assert(this.states.length >= 2, 'No previous state to get.');
|
||||
return this.stateByIndex(-1);
|
||||
}
|
||||
get currentIndex() {
|
||||
return this.states.length - 1;
|
||||
}
|
||||
stateByIndex(n: number) {
|
||||
if(n >= 0) {
|
||||
console.assert(n < this.states.length, `State index ${n} does not exist`);
|
||||
return this.states[n];
|
||||
} else {
|
||||
return this.states[this.states.length - 1 + n];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface Production {
|
||||
left: TokenClass;
|
||||
right: TokenClass[];
|
||||
// resolver: (...args: any[]) => any;
|
||||
}
|
||||
|
||||
class Grammar {
|
||||
private productions: Production[];
|
||||
private startingSymbol: NonTerminalTokenClass;
|
||||
|
||||
constructor(productions: Production[], startingSymbol: NonTerminalTokenClass) {
|
||||
this.productions = productions;
|
||||
this.startingSymbol = startingSymbol;
|
||||
}
|
||||
|
||||
solveFor(tokens: Token[]) {
|
||||
const state = new TimeMachine<SingleEarleyState>(() => new SingleEarleyState());
|
||||
|
||||
const possibleStartingProductions = getProductionsForTokenClass(this.productions, this.startingSymbol)
|
||||
for(const production of possibleStartingProductions) {
|
||||
state.current.partialMatches.push(new PartialMatch(production, 0, state.currentIndex));
|
||||
}
|
||||
|
||||
// expand all non terminals here again
|
||||
const expand = (partial: PartialMatch) => {
|
||||
if(partial.complete) {
|
||||
const pastPartials = state.stateByIndex(partial.source).partialMatches;
|
||||
for(const pastPartial of pastPartials) {
|
||||
if(pastPartial.nextTokenClass === partial.production.left) {
|
||||
const newPartial = pastPartial.getAdvancedCopy();
|
||||
expand(newPartial);
|
||||
state.current.partialMatches.push(newPartial);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
const nextTokenClass = partial.nextTokenClass;
|
||||
if(isTerminal(nextTokenClass)) return;
|
||||
const possibleProductions = getProductionsForTokenClass(this.productions, nextTokenClass);
|
||||
for(const production of possibleProductions) {
|
||||
const partialMatch = new PartialMatch(production, 0, state.currentIndex);
|
||||
expand(partialMatch);
|
||||
state.current.partialMatches.push(partialMatch)
|
||||
}
|
||||
}
|
||||
|
||||
state.current.partialMatches.forEach(expand);
|
||||
|
||||
// expand all non terminals here
|
||||
|
||||
console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + this.startingSymbol.toString());
|
||||
console.log(state.current.toString(), '\n\n')
|
||||
|
||||
for(const token of tokens) {
|
||||
state.newState();
|
||||
console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + token.toString());
|
||||
|
||||
for(const partialMatch of state.previousState.partialMatches) {
|
||||
if(partialMatch.complete) continue;
|
||||
// if our current token falls in line with what we need, then yeah, lets do it.
|
||||
if(token instanceof partialMatch.nextTokenClass) {
|
||||
state.current.partialMatches.push(partialMatch.getAdvancedCopy());
|
||||
}
|
||||
}
|
||||
|
||||
console.assert(state.current.partialMatches.length !== 0, ansi(4, 1, 1)('unexpected token ' + token.toString()))
|
||||
|
||||
state.current.partialMatches.forEach(expand);
|
||||
state.current.deduplicate()
|
||||
|
||||
console.log(state.current.toString(), '\n\n')
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getProductionsForTokenClass(productions: Production[], tokenClass: TokenClass): Production[] {
|
||||
return productions.filter((p: Production) => {
|
||||
return p.left === tokenClass
|
||||
})
|
||||
}
|
||||
|
||||
function getFirstTerminalsForTokenClass(productions: Production[], tokenClass: TokenClass): TerminalTokenClass[] {
|
||||
if(isTerminal(tokenClass)) return [tokenClass];
|
||||
const tokenClasses = getProductionsForTokenClass(productions, tokenClass).map((p: Production) => {
|
||||
return getFirstTerminalsForTokenClass(productions, p.right[0])
|
||||
}).flat();
|
||||
const tokenClassesDeduped = [... new Set(tokenClasses)];
|
||||
return tokenClassesDeduped;
|
||||
}
|
||||
|
||||
class PartialMatch {
|
||||
readonly production: Production;
|
||||
readonly progress: number = 0;
|
||||
readonly source: number = 0;
|
||||
constructor(production: Production, completion: number, source: number) {
|
||||
this.production = production;
|
||||
this.progress = completion;
|
||||
this.source = source;
|
||||
}
|
||||
get complete() {
|
||||
return this.production.right.length === this.progress;
|
||||
}
|
||||
get nextTokenClass(): TokenClass {
|
||||
return this.production.right[this.progress];
|
||||
}
|
||||
// getNextTerminal(productions: Production[]) {
|
||||
// if()
|
||||
// return getFirstTerminalsForTokenClass
|
||||
// }
|
||||
getAdvancedCopy() {
|
||||
return new PartialMatch(this.production, this.progress + 1, this.source);
|
||||
}
|
||||
toString() {
|
||||
const rightSide = [];
|
||||
const addDot = () => rightSide.push(ansi(5, 1, 2)('\u2022'))
|
||||
for(let i = 0; i < this.production.right.length; i++) {
|
||||
if(this.progress === i) addDot();
|
||||
rightSide.push(this.production.right[i].toString())
|
||||
}
|
||||
if(this.complete) addDot();
|
||||
return this.production.left.toString() + ansi(2, 2, 2)(' => ') + rightSide.join(' ') + ansi(2, 2, 2)(' (' + this.source + ')')
|
||||
}
|
||||
}
|
||||
|
||||
function deduplicate<T>(arr: T[], fn: (a: T, b: T) => boolean) {
|
||||
const newArr = [];
|
||||
for(const item of arr) {
|
||||
if(!newArr.map((a) => fn(a, item)).reduce((a, b) => a || b, false)) newArr.push(item);
|
||||
}
|
||||
return newArr;
|
||||
}
|
||||
|
||||
class SingleEarleyState {
|
||||
partialMatches: PartialMatch[] = [];
|
||||
constructor() {}
|
||||
|
||||
deduplicate() {
|
||||
this.partialMatches = deduplicate(this.partialMatches, (a: PartialMatch, b: PartialMatch) => {
|
||||
return a.production === b.production
|
||||
&& a.progress === b.progress
|
||||
&& a.source === b.source
|
||||
})
|
||||
}
|
||||
|
||||
toString() {
|
||||
return this.partialMatches.map(pm => pm.toString()).join('\n');
|
||||
}
|
||||
}
|
||||
|
||||
const tokens: Token[] = [
|
||||
new $Number(1, 1, '45'),
|
||||
new $Plus(1, 3),
|
||||
new $Number(1, 1, '45'),
|
||||
new $Times(1, 3),
|
||||
new $Number(1, 1, '45'),
|
||||
new $Plus(1, 3),
|
||||
new $Number(1, 1, '45'),
|
||||
]
|
||||
|
||||
|
||||
const ps: Production[] = [
|
||||
{
|
||||
left: $Term, right: [$Addition, $Times, $Addition]
|
||||
},
|
||||
{
|
||||
left: $Addition, right: [$Number, $Plus, $Number]
|
||||
},
|
||||
]
|
||||
|
||||
const grammar = new Grammar(ps, $Term);
|
||||
|
||||
console.log(grammar.solveFor(tokens));
|
||||
|
||||
// console.log(getFirstTerminalsForTokenClass(ps, $Term))
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
const chalk = require('chalk');
|
||||
|
||||
// const keywords = new Map([
|
||||
// ['=', 'EQUALS'],
|
||||
// ['(', 'LPAREN'],
|
||||
// [')', 'RPAREN'],
|
||||
// ['link', 'LINK'],
|
||||
// ['const', 'CONST'],
|
||||
// ]);
|
||||
|
||||
// const Tokens = {
|
||||
// Keyword(str) { return { type: 'KEYWORD', value: keywords.get(str) } },
|
||||
// Newline() { return { type: 'NEWLINE' } },
|
||||
// Identifier(str) { return { type: 'IDENTIFIER', value: str } },
|
||||
// String(str) { return { type: 'STRING', value: str } }
|
||||
// }
|
||||
|
||||
function tokenize(string) {
|
||||
let inString = false;
|
||||
let escaping = false;
|
||||
let tokens = [];
|
||||
let token = '';
|
||||
// let line = 1;
|
||||
// let col = 1;
|
||||
// const newline = () => (col = 1, line ++);
|
||||
// const nextColumn = () => line ++;
|
||||
const resetToken = () => token = '';
|
||||
const addToken = (_token) => {
|
||||
tokens.push(_token ?? token);
|
||||
resetToken();
|
||||
}
|
||||
// // let _line = line;
|
||||
// // let _col = col;
|
||||
// if(_token) {
|
||||
// token = _token;
|
||||
// }
|
||||
// if(token.trim() !== '') {
|
||||
// if(keywords.has(token))
|
||||
// tokens.push(Tokens.Keyword(token));
|
||||
// else if (isStringDelim(token[0]))
|
||||
// tokens.push(Tokens.String(token));
|
||||
// else if (token === 'NEWLINE')
|
||||
// tokens.push(Tokens.Newline())
|
||||
// else
|
||||
// tokens.push(Tokens.Identifier(token));
|
||||
// resetToken();
|
||||
// }
|
||||
const isWhitespace = (char) => [' ', '\n', '\t', '\r'].includes(char);
|
||||
const isNewline = (char) => char === '\n';
|
||||
const isSingleCharToken = (char) => ['(', ')', '='].includes(char);
|
||||
const isStringDelim = (char) => ["'", '"'].includes(char);
|
||||
const isEscapeChar = (char) => char === '\\';
|
||||
const escape = (char) => (char === 'n' ? '\n'
|
||||
: char === 't' ? '\t'
|
||||
: char === 'r' ? '\r' : char)
|
||||
|
||||
for (const char of string) {
|
||||
if(isNewline(char)) {
|
||||
// newline();
|
||||
addToken();
|
||||
// only add newlines if we've actually started tokens...
|
||||
if(tokens.length > 0)
|
||||
addToken('NEWLINE')
|
||||
} else if (escaping) {
|
||||
token += escape(char)
|
||||
escaping = false;
|
||||
} else if (isStringDelim(char)) {
|
||||
token += char;
|
||||
inString = !inString;
|
||||
} else if (inString) {
|
||||
if(isEscapeChar(char)) {
|
||||
escaping = true;
|
||||
} else {
|
||||
token += char
|
||||
}
|
||||
} else if(isSingleCharToken(char)) {
|
||||
addToken();
|
||||
addToken(char);
|
||||
} else if(isWhitespace(char)) {
|
||||
addToken();
|
||||
} else {
|
||||
token += char;
|
||||
}
|
||||
// if(!isNewline(char))
|
||||
// nextColumn();
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
module.exports = tokenize;
|
||||
|
||||
const tokens = tokenize(require('fs').readFileSync('disco.disco').toString('utf-8'));
|
||||
|
||||
|
||||
require('fs').writeFileSync('bytecode.json', JSON.stringify(tokens, null, 2))
|
||||
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"module": "commonjs",
|
||||
"target": "ESNext",
|
||||
"strictFunctionTypes": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "out"
|
||||
},
|
||||
"include": [
|
||||
"src/**/*.ts"
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
const chalk = require('chalk');
|
||||
|
||||
module.exports.printTokens = function printTokens(tokens) {
|
||||
for(const token of tokens) {
|
||||
if(token.type === 'NEWLINE') {
|
||||
process.stdout.write(chalk.bgRedBright.black(' LF ') + ' ');
|
||||
continue;
|
||||
}
|
||||
const correctedToken = ('value' in token ? token.type + ':' + token.value : token.type)
|
||||
.replaceAll('\n', chalk.inverse('LF'))
|
||||
.replaceAll('\r', chalk.inverse('CR'))
|
||||
.replaceAll('\t', chalk.inverse('TB'))
|
||||
.replaceAll('\n', chalk.inverse('LF'))
|
||||
process.stdout.write(`${chalk.grey('(')}${chalk.cyanBright(correctedToken)}${chalk.grey(')')} `);
|
||||
}
|
||||
console.log();
|
||||
}
|
||||
|
||||
module.exports.printProductions = function printProductions(productions) {
|
||||
for(const resolvedName in productions) {
|
||||
for(const [production, resolver] of productions[resolvedName]) {
|
||||
console.log(chalk.green.inverse(resolvedName), chalk.grey('->'), production.map(v => typeof v === 'function' ? chalk.red(v.name) : chalk.green(v)).join(' '));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
|
||||
# yarn lockfile v1
|
||||
|
||||
|
||||
ansi-styles@^4.1.0:
|
||||
version "4.3.0"
|
||||
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
|
||||
integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
|
||||
dependencies:
|
||||
color-convert "^2.0.1"
|
||||
|
||||
chalk@3:
|
||||
version "3.0.0"
|
||||
resolved "https://registry.yarnpkg.com/chalk/-/chalk-3.0.0.tgz#3f73c2bf526591f574cc492c51e2456349f844e4"
|
||||
integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
|
||||
dependencies:
|
||||
ansi-styles "^4.1.0"
|
||||
supports-color "^7.1.0"
|
||||
|
||||
color-convert@^2.0.1:
|
||||
version "2.0.1"
|
||||
resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3"
|
||||
integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==
|
||||
dependencies:
|
||||
color-name "~1.1.4"
|
||||
|
||||
color-name@~1.1.4:
|
||||
version "1.1.4"
|
||||
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
|
||||
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
|
||||
|
||||
has-flag@^4.0.0:
|
||||
version "4.0.0"
|
||||
resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
|
||||
integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
|
||||
|
||||
supports-color@^7.1.0:
|
||||
version "7.2.0"
|
||||
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
|
||||
integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==
|
||||
dependencies:
|
||||
has-flag "^4.0.0"
|
||||
|
||||
typescript@^4.6.2:
|
||||
version "4.6.2"
|
||||
resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.6.2.tgz#fe12d2727b708f4eef40f51598b3398baa9611d4"
|
||||
integrity sha512-HM/hFigTBHZhLXshn9sN37H085+hQGeJHJ/X7LpBWLID/fbc2acUMfU+lGD98X81sKP+pFa9f0DZmCwB9GnbAg==
|
||||
Loading…
Reference in New Issue