listen, the parser works

canary
Neon 2022-03-12 07:49:35 -05:00
parent 3c99623b6a
commit a693c9673e
11 changed files with 503 additions and 11 deletions

3
.gitignore vendored
View File

@ -1,3 +1,4 @@
disco
out
*.o
*.o
node_modules

16
ast.js
View File

@ -1,10 +1,10 @@
module.exports = {
Body(statements) { return { type: 'body', value: statements } },
Link(identifier) { return { type: 'link', value: identifier } },
Invocation(identifier, ...args) { return { type: 'invo', value: identifier, args } },
Const(name, value) { return { type: 'const', value, name } },
Int(n) { return { type: 'int', value: n } },
String(s) { return { type: 'string', value: s } },
Variable(name, value) { return { type: 'var', value, name } },
VariableReference(name) { return { type: 'ref', value: name } }
Body(statements) { return { type: 'body', value: statements } },
Link(identifier) { return { type: 'link', value: identifier } },
Invocation(identifier, ...args) { return { type: 'invo', value: identifier, args } },
Const(name, value) { return { type: 'const', value, name } },
Int(n) { return { type: 'int', value: n } },
String(s) { return { type: 'string', value: s } },
Variable(name, value) { return { type: 'var', value, name } },
VariableReference(name) { return { type: 'ref', value: name } },
}

21
bytecode.json 100644
View File

@ -0,0 +1,21 @@
[
{
"type": "KEYWORD",
"value": "LINK"
},
{
"type": "IDENTIFIER",
"value": "log1"
},
{
"type": "NEWLINE"
},
{
"type": "KEYWORD",
"value": "LINK"
},
{
"type": "IDENTIFIER",
"value": "log2"
}
]

View File

@ -1,4 +1,18 @@
link log1
link log2
link log
log("Hello")
log("World")
const test = "Hello"
const test2 ="Hello2"
const test3 = 'Hello'
const a="5"
log(test)
log("World")
log("Hello\n \"Wor(l)d\"\n\\o/")

View File

@ -0,0 +1,7 @@
module.exports = {
asmName: '_log_time',
asm: `\
mov rax, 30
ret`
}

View File

@ -7,5 +7,9 @@
"build": "nasm -f elf64 disco.asm -o disco.o && ld disco.o -o disco",
"start": "./disco",
"dev": "yarn build && yarn start"
},
"dependencies": {
"chalk": "3",
"typescript": "^4.6.2"
}
}

264
src/createAST.ts 100644
View File

@ -0,0 +1,264 @@
import * as chalk from 'chalk';
const rgb2ansi = (r: number, g: number, b: number) => r * 36 + g * 6 + b + 16
const ansi = (r: number, g = r, b = r) => chalk.ansi256(rgb2ansi(r, g, b));
abstract class Token {
l: number;
c: number;
static terminal: boolean;
constructor(l: number, c: number) {
this.l = l;
this.c = c;
}
static toString() {
if(this.terminal) {
return ansi(0, 3, 2)('$') + ansi(0, 5, 3)(`${this.name.substring(1)}`)
} else {
return ansi(0, 2, 3)('$') + ansi(0, 3, 5)(`${this.name.substring(1)}`)
}
}
valueToString() {
return this.constructor.toString();
}
toString() {
return ansi(2)('(') +
this.valueToString() +
ansi(2)(':') +
ansi(3)(this.l) +
ansi(2)(':') +
ansi(3)(this.c) +
ansi(2)(')')
}
}
class NonTerminal extends Token { static terminal: false = false };
class Terminal extends Token { static terminal: true = true };
function isTerminal(tokenClass: TokenClass): tokenClass is TerminalTokenClass {
return tokenClass.terminal;
}
function isNonTerminal(tokenClass: TokenClass): tokenClass is NonTerminalTokenClass {
return !tokenClass.terminal;
}
type TerminalTokenClass = { new(...args: any[]) : Terminal, terminal: true }
type NonTerminalTokenClass = { new(...args: any[]) : NonTerminal, terminal: false }
type TokenClass = TerminalTokenClass | NonTerminalTokenClass;
// class Identifier extends Token { constructor(l, c, value) { super(l, c); this.value = value; } }
class $Number extends Terminal { value: string; constructor(l: number, c: number, value: string) { super(l, c); this.value = value; } }
class $Plus extends Terminal { }
class $Times extends Terminal { }
class $Term extends NonTerminal { }
class $Poop extends NonTerminal { }
class $Addition extends NonTerminal { }
function getTokenClassFromToken(token: Token): TokenClass {
return token.constructor as TokenClass;
}
class TimeMachine<T> {
states: T[] = [];
stateConstructor: () => T;
constructor(fn: () => T) {
this.stateConstructor = fn;
this.newState();
}
newState() {
this.states.push(this.stateConstructor());
}
get current() {
return this.states[this.states.length - 1];
}
get previousState() {
console.assert(this.states.length >= 2, 'No previous state to get.');
return this.stateByIndex(-1);
}
get currentIndex() {
return this.states.length - 1;
}
stateByIndex(n: number) {
if(n >= 0) {
console.assert(n < this.states.length, `State index ${n} does not exist`);
return this.states[n];
} else {
return this.states[this.states.length - 1 + n];
}
}
}
interface Production {
left: TokenClass;
right: TokenClass[];
// resolver: (...args: any[]) => any;
}
class Grammar {
private productions: Production[];
private startingSymbol: NonTerminalTokenClass;
constructor(productions: Production[], startingSymbol: NonTerminalTokenClass) {
this.productions = productions;
this.startingSymbol = startingSymbol;
}
solveFor(tokens: Token[]) {
const state = new TimeMachine<SingleEarleyState>(() => new SingleEarleyState());
const possibleStartingProductions = getProductionsForTokenClass(this.productions, this.startingSymbol)
for(const production of possibleStartingProductions) {
state.current.partialMatches.push(new PartialMatch(production, 0, state.currentIndex));
}
// expand all non terminals here again
const expand = (partial: PartialMatch) => {
if(partial.complete) {
const pastPartials = state.stateByIndex(partial.source).partialMatches;
for(const pastPartial of pastPartials) {
if(pastPartial.nextTokenClass === partial.production.left) {
const newPartial = pastPartial.getAdvancedCopy();
expand(newPartial);
state.current.partialMatches.push(newPartial);
}
}
return;
}
const nextTokenClass = partial.nextTokenClass;
if(isTerminal(nextTokenClass)) return;
const possibleProductions = getProductionsForTokenClass(this.productions, nextTokenClass);
for(const production of possibleProductions) {
const partialMatch = new PartialMatch(production, 0, state.currentIndex);
expand(partialMatch);
state.current.partialMatches.push(partialMatch)
}
}
state.current.partialMatches.forEach(expand);
// expand all non terminals here
console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + this.startingSymbol.toString());
console.log(state.current.toString(), '\n\n')
for(const token of tokens) {
state.newState();
console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + token.toString());
for(const partialMatch of state.previousState.partialMatches) {
if(partialMatch.complete) continue;
// if our current token falls in line with what we need, then yeah, lets do it.
if(token instanceof partialMatch.nextTokenClass) {
state.current.partialMatches.push(partialMatch.getAdvancedCopy());
}
}
console.assert(state.current.partialMatches.length !== 0, ansi(4, 1, 1)('unexpected token ' + token.toString()))
state.current.partialMatches.forEach(expand);
state.current.deduplicate()
console.log(state.current.toString(), '\n\n')
}
}
}
function getProductionsForTokenClass(productions: Production[], tokenClass: TokenClass): Production[] {
return productions.filter((p: Production) => {
return p.left === tokenClass
})
}
function getFirstTerminalsForTokenClass(productions: Production[], tokenClass: TokenClass): TerminalTokenClass[] {
if(isTerminal(tokenClass)) return [tokenClass];
const tokenClasses = getProductionsForTokenClass(productions, tokenClass).map((p: Production) => {
return getFirstTerminalsForTokenClass(productions, p.right[0])
}).flat();
const tokenClassesDeduped = [... new Set(tokenClasses)];
return tokenClassesDeduped;
}
class PartialMatch {
readonly production: Production;
readonly progress: number = 0;
readonly source: number = 0;
constructor(production: Production, completion: number, source: number) {
this.production = production;
this.progress = completion;
this.source = source;
}
get complete() {
return this.production.right.length === this.progress;
}
get nextTokenClass(): TokenClass {
return this.production.right[this.progress];
}
// getNextTerminal(productions: Production[]) {
// if()
// return getFirstTerminalsForTokenClass
// }
getAdvancedCopy() {
return new PartialMatch(this.production, this.progress + 1, this.source);
}
toString() {
const rightSide = [];
const addDot = () => rightSide.push(ansi(5, 1, 2)('\u2022'))
for(let i = 0; i < this.production.right.length; i++) {
if(this.progress === i) addDot();
rightSide.push(this.production.right[i].toString())
}
if(this.complete) addDot();
return this.production.left.toString() + ansi(2, 2, 2)(' => ') + rightSide.join(' ') + ansi(2, 2, 2)(' (' + this.source + ')')
}
}
function deduplicate<T>(arr: T[], fn: (a: T, b: T) => boolean) {
const newArr = [];
for(const item of arr) {
if(!newArr.map((a) => fn(a, item)).reduce((a, b) => a || b, false)) newArr.push(item);
}
return newArr;
}
class SingleEarleyState {
partialMatches: PartialMatch[] = [];
constructor() {}
deduplicate() {
this.partialMatches = deduplicate(this.partialMatches, (a: PartialMatch, b: PartialMatch) => {
return a.production === b.production
&& a.progress === b.progress
&& a.source === b.source
})
}
toString() {
return this.partialMatches.map(pm => pm.toString()).join('\n');
}
}
const tokens: Token[] = [
new $Number(1, 1, '45'),
new $Plus(1, 3),
new $Number(1, 1, '45'),
new $Times(1, 3),
new $Number(1, 1, '45'),
new $Plus(1, 3),
new $Number(1, 1, '45'),
]
const ps: Production[] = [
{
left: $Term, right: [$Addition, $Times, $Addition]
},
{
left: $Addition, right: [$Number, $Plus, $Number]
},
]
const grammar = new Grammar(ps, $Term);
console.log(grammar.solveFor(tokens));
// console.log(getFirstTerminalsForTokenClass(ps, $Term))

97
tokenizer.js 100644
View File

@ -0,0 +1,97 @@
const chalk = require('chalk');
// const keywords = new Map([
// ['=', 'EQUALS'],
// ['(', 'LPAREN'],
// [')', 'RPAREN'],
// ['link', 'LINK'],
// ['const', 'CONST'],
// ]);
// const Tokens = {
// Keyword(str) { return { type: 'KEYWORD', value: keywords.get(str) } },
// Newline() { return { type: 'NEWLINE' } },
// Identifier(str) { return { type: 'IDENTIFIER', value: str } },
// String(str) { return { type: 'STRING', value: str } }
// }
function tokenize(string) {
let inString = false;
let escaping = false;
let tokens = [];
let token = '';
// let line = 1;
// let col = 1;
// const newline = () => (col = 1, line ++);
// const nextColumn = () => line ++;
const resetToken = () => token = '';
const addToken = (_token) => {
tokens.push(_token ?? token);
resetToken();
}
// // let _line = line;
// // let _col = col;
// if(_token) {
// token = _token;
// }
// if(token.trim() !== '') {
// if(keywords.has(token))
// tokens.push(Tokens.Keyword(token));
// else if (isStringDelim(token[0]))
// tokens.push(Tokens.String(token));
// else if (token === 'NEWLINE')
// tokens.push(Tokens.Newline())
// else
// tokens.push(Tokens.Identifier(token));
// resetToken();
// }
const isWhitespace = (char) => [' ', '\n', '\t', '\r'].includes(char);
const isNewline = (char) => char === '\n';
const isSingleCharToken = (char) => ['(', ')', '='].includes(char);
const isStringDelim = (char) => ["'", '"'].includes(char);
const isEscapeChar = (char) => char === '\\';
const escape = (char) => (char === 'n' ? '\n'
: char === 't' ? '\t'
: char === 'r' ? '\r' : char)
for (const char of string) {
if(isNewline(char)) {
// newline();
addToken();
// only add newlines if we've actually started tokens...
if(tokens.length > 0)
addToken('NEWLINE')
} else if (escaping) {
token += escape(char)
escaping = false;
} else if (isStringDelim(char)) {
token += char;
inString = !inString;
} else if (inString) {
if(isEscapeChar(char)) {
escaping = true;
} else {
token += char
}
} else if(isSingleCharToken(char)) {
addToken();
addToken(char);
} else if(isWhitespace(char)) {
addToken();
} else {
token += char;
}
// if(!isNewline(char))
// nextColumn();
}
return tokens;
}
module.exports = tokenize;
const tokens = tokenize(require('fs').readFileSync('disco.disco').toString('utf-8'));
require('fs').writeFileSync('bytecode.json', JSON.stringify(tokens, null, 2))

12
tsconfig.json 100644
View File

@ -0,0 +1,12 @@
{
"compilerOptions": {
"module": "commonjs",
"target": "ESNext",
"strictFunctionTypes": true,
"sourceMap": true,
"outDir": "out"
},
"include": [
"src/**/*.ts"
]
}

25
utils.js 100644
View File

@ -0,0 +1,25 @@
const chalk = require('chalk');
module.exports.printTokens = function printTokens(tokens) {
for(const token of tokens) {
if(token.type === 'NEWLINE') {
process.stdout.write(chalk.bgRedBright.black(' LF ') + ' ');
continue;
}
const correctedToken = ('value' in token ? token.type + ':' + token.value : token.type)
.replaceAll('\n', chalk.inverse('LF'))
.replaceAll('\r', chalk.inverse('CR'))
.replaceAll('\t', chalk.inverse('TB'))
.replaceAll('\n', chalk.inverse('LF'))
process.stdout.write(`${chalk.grey('(')}${chalk.cyanBright(correctedToken)}${chalk.grey(')')} `);
}
console.log();
}
module.exports.printProductions = function printProductions(productions) {
for(const resolvedName in productions) {
for(const [production, resolver] of productions[resolvedName]) {
console.log(chalk.green.inverse(resolvedName), chalk.grey('->'), production.map(v => typeof v === 'function' ? chalk.red(v.name) : chalk.green(v)).join(' '));
}
}
}

47
yarn.lock 100644
View File

@ -0,0 +1,47 @@
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
# yarn lockfile v1
ansi-styles@^4.1.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937"
integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==
dependencies:
color-convert "^2.0.1"
chalk@3:
version "3.0.0"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-3.0.0.tgz#3f73c2bf526591f574cc492c51e2456349f844e4"
integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==
dependencies:
ansi-styles "^4.1.0"
supports-color "^7.1.0"
color-convert@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3"
integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==
dependencies:
color-name "~1.1.4"
color-name@~1.1.4:
version "1.1.4"
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
has-flag@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b"
integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==
supports-color@^7.1.0:
version "7.2.0"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da"
integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==
dependencies:
has-flag "^4.0.0"
typescript@^4.6.2:
version "4.6.2"
resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.6.2.tgz#fe12d2727b708f4eef40f51598b3398baa9611d4"
integrity sha512-HM/hFigTBHZhLXshn9sN37H085+hQGeJHJ/X7LpBWLID/fbc2acUMfU+lGD98X81sKP+pFa9f0DZmCwB9GnbAg==