diff --git a/.gitignore b/.gitignore index f7764e6..e967f96 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ disco out -*.o \ No newline at end of file +*.o +node_modules \ No newline at end of file diff --git a/ast.js b/ast.js index 7e88815..93bba94 100644 --- a/ast.js +++ b/ast.js @@ -1,10 +1,10 @@ module.exports = { - Body(statements) { return { type: 'body', value: statements } }, - Link(identifier) { return { type: 'link', value: identifier } }, - Invocation(identifier, ...args) { return { type: 'invo', value: identifier, args } }, - Const(name, value) { return { type: 'const', value, name } }, - Int(n) { return { type: 'int', value: n } }, - String(s) { return { type: 'string', value: s } }, - Variable(name, value) { return { type: 'var', value, name } }, - VariableReference(name) { return { type: 'ref', value: name } } + Body(statements) { return { type: 'body', value: statements } }, + Link(identifier) { return { type: 'link', value: identifier } }, + Invocation(identifier, ...args) { return { type: 'invo', value: identifier, args } }, + Const(name, value) { return { type: 'const', value, name } }, + Int(n) { return { type: 'int', value: n } }, + String(s) { return { type: 'string', value: s } }, + Variable(name, value) { return { type: 'var', value, name } }, + VariableReference(name) { return { type: 'ref', value: name } }, } \ No newline at end of file diff --git a/bytecode.json b/bytecode.json new file mode 100644 index 0000000..6593d15 --- /dev/null +++ b/bytecode.json @@ -0,0 +1,21 @@ +[ + { + "type": "KEYWORD", + "value": "LINK" + }, + { + "type": "IDENTIFIER", + "value": "log1" + }, + { + "type": "NEWLINE" + }, + { + "type": "KEYWORD", + "value": "LINK" + }, + { + "type": "IDENTIFIER", + "value": "log2" + } +] \ No newline at end of file diff --git a/disco.disco b/disco.disco index 990d1f0..3b0117d 100644 --- a/disco.disco +++ b/disco.disco @@ -1,4 +1,18 @@ +link log1 +link log2 + + + link log -log("Hello") -log("World") \ No newline at end of file +const test = "Hello" +const test2 ="Hello2" +const test3 = 'Hello' +const a="5" +log(test) +log("World") +log("Hello\n \"Wor(l)d\"\n\\o/") + + + + diff --git a/linkables/printTime.js b/linkables/printTime.js new file mode 100644 index 0000000..680669f --- /dev/null +++ b/linkables/printTime.js @@ -0,0 +1,7 @@ +module.exports = { + asmName: '_log_time', + asm: `\ + mov rax, 30 + + ret` +} \ No newline at end of file diff --git a/package.json b/package.json index f50ff1a..5246f7e 100644 --- a/package.json +++ b/package.json @@ -7,5 +7,9 @@ "build": "nasm -f elf64 disco.asm -o disco.o && ld disco.o -o disco", "start": "./disco", "dev": "yarn build && yarn start" + }, + "dependencies": { + "chalk": "3", + "typescript": "^4.6.2" } } diff --git a/src/createAST.ts b/src/createAST.ts new file mode 100644 index 0000000..337f73f --- /dev/null +++ b/src/createAST.ts @@ -0,0 +1,264 @@ +import * as chalk from 'chalk'; + +const rgb2ansi = (r: number, g: number, b: number) => r * 36 + g * 6 + b + 16 +const ansi = (r: number, g = r, b = r) => chalk.ansi256(rgb2ansi(r, g, b)); + +abstract class Token { + l: number; + c: number; + static terminal: boolean; + constructor(l: number, c: number) { + this.l = l; + this.c = c; + } + static toString() { + if(this.terminal) { + return ansi(0, 3, 2)('$') + ansi(0, 5, 3)(`${this.name.substring(1)}`) + } else { + return ansi(0, 2, 3)('$') + ansi(0, 3, 5)(`${this.name.substring(1)}`) + } + } + valueToString() { + return this.constructor.toString(); + } + toString() { + return ansi(2)('(') + + this.valueToString() + + ansi(2)(':') + + ansi(3)(this.l) + + ansi(2)(':') + + ansi(3)(this.c) + + ansi(2)(')') + } +} +class NonTerminal extends Token { static terminal: false = false }; +class Terminal extends Token { static terminal: true = true }; + +function isTerminal(tokenClass: TokenClass): tokenClass is TerminalTokenClass { + return tokenClass.terminal; +} + +function isNonTerminal(tokenClass: TokenClass): tokenClass is NonTerminalTokenClass { + return !tokenClass.terminal; +} + +type TerminalTokenClass = { new(...args: any[]) : Terminal, terminal: true } +type NonTerminalTokenClass = { new(...args: any[]) : NonTerminal, terminal: false } +type TokenClass = TerminalTokenClass | NonTerminalTokenClass; + +// class Identifier extends Token { constructor(l, c, value) { super(l, c); this.value = value; } } +class $Number extends Terminal { value: string; constructor(l: number, c: number, value: string) { super(l, c); this.value = value; } } +class $Plus extends Terminal { } +class $Times extends Terminal { } +class $Term extends NonTerminal { } +class $Poop extends NonTerminal { } +class $Addition extends NonTerminal { } + +function getTokenClassFromToken(token: Token): TokenClass { + return token.constructor as TokenClass; +} + +class TimeMachine { + states: T[] = []; + stateConstructor: () => T; + constructor(fn: () => T) { + this.stateConstructor = fn; + this.newState(); + } + newState() { + this.states.push(this.stateConstructor()); + } + get current() { + return this.states[this.states.length - 1]; + } + get previousState() { + console.assert(this.states.length >= 2, 'No previous state to get.'); + return this.stateByIndex(-1); + } + get currentIndex() { + return this.states.length - 1; + } + stateByIndex(n: number) { + if(n >= 0) { + console.assert(n < this.states.length, `State index ${n} does not exist`); + return this.states[n]; + } else { + return this.states[this.states.length - 1 + n]; + } + } +} + +interface Production { + left: TokenClass; + right: TokenClass[]; + // resolver: (...args: any[]) => any; +} + +class Grammar { + private productions: Production[]; + private startingSymbol: NonTerminalTokenClass; + + constructor(productions: Production[], startingSymbol: NonTerminalTokenClass) { + this.productions = productions; + this.startingSymbol = startingSymbol; + } + + solveFor(tokens: Token[]) { + const state = new TimeMachine(() => new SingleEarleyState()); + + const possibleStartingProductions = getProductionsForTokenClass(this.productions, this.startingSymbol) + for(const production of possibleStartingProductions) { + state.current.partialMatches.push(new PartialMatch(production, 0, state.currentIndex)); + } + + // expand all non terminals here again + const expand = (partial: PartialMatch) => { + if(partial.complete) { + const pastPartials = state.stateByIndex(partial.source).partialMatches; + for(const pastPartial of pastPartials) { + if(pastPartial.nextTokenClass === partial.production.left) { + const newPartial = pastPartial.getAdvancedCopy(); + expand(newPartial); + state.current.partialMatches.push(newPartial); + } + } + return; + } + const nextTokenClass = partial.nextTokenClass; + if(isTerminal(nextTokenClass)) return; + const possibleProductions = getProductionsForTokenClass(this.productions, nextTokenClass); + for(const production of possibleProductions) { + const partialMatch = new PartialMatch(production, 0, state.currentIndex); + expand(partialMatch); + state.current.partialMatches.push(partialMatch) + } + } + + state.current.partialMatches.forEach(expand); + + // expand all non terminals here + + console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + this.startingSymbol.toString()); + console.log(state.current.toString(), '\n\n') + + for(const token of tokens) { + state.newState(); + console.log(ansi(3, 3, 0)('s') + ansi(4, 4, 0)(state.currentIndex) + ': ' + token.toString()); + + for(const partialMatch of state.previousState.partialMatches) { + if(partialMatch.complete) continue; + // if our current token falls in line with what we need, then yeah, lets do it. + if(token instanceof partialMatch.nextTokenClass) { + state.current.partialMatches.push(partialMatch.getAdvancedCopy()); + } + } + + console.assert(state.current.partialMatches.length !== 0, ansi(4, 1, 1)('unexpected token ' + token.toString())) + + state.current.partialMatches.forEach(expand); + state.current.deduplicate() + + console.log(state.current.toString(), '\n\n') + } + } +} + +function getProductionsForTokenClass(productions: Production[], tokenClass: TokenClass): Production[] { + return productions.filter((p: Production) => { + return p.left === tokenClass + }) +} + +function getFirstTerminalsForTokenClass(productions: Production[], tokenClass: TokenClass): TerminalTokenClass[] { + if(isTerminal(tokenClass)) return [tokenClass]; + const tokenClasses = getProductionsForTokenClass(productions, tokenClass).map((p: Production) => { + return getFirstTerminalsForTokenClass(productions, p.right[0]) + }).flat(); + const tokenClassesDeduped = [... new Set(tokenClasses)]; + return tokenClassesDeduped; +} + +class PartialMatch { + readonly production: Production; + readonly progress: number = 0; + readonly source: number = 0; + constructor(production: Production, completion: number, source: number) { + this.production = production; + this.progress = completion; + this.source = source; + } + get complete() { + return this.production.right.length === this.progress; + } + get nextTokenClass(): TokenClass { + return this.production.right[this.progress]; + } + // getNextTerminal(productions: Production[]) { + // if() + // return getFirstTerminalsForTokenClass + // } + getAdvancedCopy() { + return new PartialMatch(this.production, this.progress + 1, this.source); + } + toString() { + const rightSide = []; + const addDot = () => rightSide.push(ansi(5, 1, 2)('\u2022')) + for(let i = 0; i < this.production.right.length; i++) { + if(this.progress === i) addDot(); + rightSide.push(this.production.right[i].toString()) + } + if(this.complete) addDot(); + return this.production.left.toString() + ansi(2, 2, 2)(' => ') + rightSide.join(' ') + ansi(2, 2, 2)(' (' + this.source + ')') + } +} + +function deduplicate(arr: T[], fn: (a: T, b: T) => boolean) { + const newArr = []; + for(const item of arr) { + if(!newArr.map((a) => fn(a, item)).reduce((a, b) => a || b, false)) newArr.push(item); + } + return newArr; +} + +class SingleEarleyState { + partialMatches: PartialMatch[] = []; + constructor() {} + + deduplicate() { + this.partialMatches = deduplicate(this.partialMatches, (a: PartialMatch, b: PartialMatch) => { + return a.production === b.production + && a.progress === b.progress + && a.source === b.source + }) + } + + toString() { + return this.partialMatches.map(pm => pm.toString()).join('\n'); + } +} + +const tokens: Token[] = [ + new $Number(1, 1, '45'), + new $Plus(1, 3), + new $Number(1, 1, '45'), + new $Times(1, 3), + new $Number(1, 1, '45'), + new $Plus(1, 3), + new $Number(1, 1, '45'), +] + + +const ps: Production[] = [ + { + left: $Term, right: [$Addition, $Times, $Addition] + }, + { + left: $Addition, right: [$Number, $Plus, $Number] + }, +] + +const grammar = new Grammar(ps, $Term); + +console.log(grammar.solveFor(tokens)); + +// console.log(getFirstTerminalsForTokenClass(ps, $Term)) diff --git a/tokenizer.js b/tokenizer.js new file mode 100644 index 0000000..7dd9676 --- /dev/null +++ b/tokenizer.js @@ -0,0 +1,97 @@ +const chalk = require('chalk'); + +// const keywords = new Map([ +// ['=', 'EQUALS'], +// ['(', 'LPAREN'], +// [')', 'RPAREN'], +// ['link', 'LINK'], +// ['const', 'CONST'], +// ]); + +// const Tokens = { +// Keyword(str) { return { type: 'KEYWORD', value: keywords.get(str) } }, +// Newline() { return { type: 'NEWLINE' } }, +// Identifier(str) { return { type: 'IDENTIFIER', value: str } }, +// String(str) { return { type: 'STRING', value: str } } +// } + +function tokenize(string) { + let inString = false; + let escaping = false; + let tokens = []; + let token = ''; + // let line = 1; + // let col = 1; + // const newline = () => (col = 1, line ++); + // const nextColumn = () => line ++; + const resetToken = () => token = ''; + const addToken = (_token) => { + tokens.push(_token ?? token); + resetToken(); + } + // // let _line = line; + // // let _col = col; + // if(_token) { + // token = _token; + // } + // if(token.trim() !== '') { + // if(keywords.has(token)) + // tokens.push(Tokens.Keyword(token)); + // else if (isStringDelim(token[0])) + // tokens.push(Tokens.String(token)); + // else if (token === 'NEWLINE') + // tokens.push(Tokens.Newline()) + // else + // tokens.push(Tokens.Identifier(token)); + // resetToken(); + // } + const isWhitespace = (char) => [' ', '\n', '\t', '\r'].includes(char); + const isNewline = (char) => char === '\n'; + const isSingleCharToken = (char) => ['(', ')', '='].includes(char); + const isStringDelim = (char) => ["'", '"'].includes(char); + const isEscapeChar = (char) => char === '\\'; + const escape = (char) => (char === 'n' ? '\n' + : char === 't' ? '\t' + : char === 'r' ? '\r' : char) + + for (const char of string) { + if(isNewline(char)) { + // newline(); + addToken(); + // only add newlines if we've actually started tokens... + if(tokens.length > 0) + addToken('NEWLINE') + } else if (escaping) { + token += escape(char) + escaping = false; + } else if (isStringDelim(char)) { + token += char; + inString = !inString; + } else if (inString) { + if(isEscapeChar(char)) { + escaping = true; + } else { + token += char + } + } else if(isSingleCharToken(char)) { + addToken(); + addToken(char); + } else if(isWhitespace(char)) { + addToken(); + } else { + token += char; + } + // if(!isNewline(char)) + // nextColumn(); + } + + return tokens; +} + +module.exports = tokenize; + +const tokens = tokenize(require('fs').readFileSync('disco.disco').toString('utf-8')); + + +require('fs').writeFileSync('bytecode.json', JSON.stringify(tokens, null, 2)) + diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..a17dd70 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,12 @@ +{ + "compilerOptions": { + "module": "commonjs", + "target": "ESNext", + "strictFunctionTypes": true, + "sourceMap": true, + "outDir": "out" + }, + "include": [ + "src/**/*.ts" + ] +} \ No newline at end of file diff --git a/utils.js b/utils.js new file mode 100644 index 0000000..1df3c22 --- /dev/null +++ b/utils.js @@ -0,0 +1,25 @@ +const chalk = require('chalk'); + +module.exports.printTokens = function printTokens(tokens) { + for(const token of tokens) { + if(token.type === 'NEWLINE') { + process.stdout.write(chalk.bgRedBright.black(' LF ') + ' '); + continue; + } + const correctedToken = ('value' in token ? token.type + ':' + token.value : token.type) + .replaceAll('\n', chalk.inverse('LF')) + .replaceAll('\r', chalk.inverse('CR')) + .replaceAll('\t', chalk.inverse('TB')) + .replaceAll('\n', chalk.inverse('LF')) + process.stdout.write(`${chalk.grey('(')}${chalk.cyanBright(correctedToken)}${chalk.grey(')')} `); + } + console.log(); +} + +module.exports.printProductions = function printProductions(productions) { + for(const resolvedName in productions) { + for(const [production, resolver] of productions[resolvedName]) { + console.log(chalk.green.inverse(resolvedName), chalk.grey('->'), production.map(v => typeof v === 'function' ? chalk.red(v.name) : chalk.green(v)).join(' ')); + } + } +} \ No newline at end of file diff --git a/yarn.lock b/yarn.lock new file mode 100644 index 0000000..5f9b686 --- /dev/null +++ b/yarn.lock @@ -0,0 +1,47 @@ +# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY. +# yarn lockfile v1 + + +ansi-styles@^4.1.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/ansi-styles/-/ansi-styles-4.3.0.tgz#edd803628ae71c04c85ae7a0906edad34b648937" + integrity sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg== + dependencies: + color-convert "^2.0.1" + +chalk@3: + version "3.0.0" + resolved "https://registry.yarnpkg.com/chalk/-/chalk-3.0.0.tgz#3f73c2bf526591f574cc492c51e2456349f844e4" + integrity sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +color-convert@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-2.0.1.tgz#72d3a68d598c9bdb3af2ad1e84f21d896abd4de3" + integrity sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ== + dependencies: + color-name "~1.1.4" + +color-name@~1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" + integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== + +has-flag@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/has-flag/-/has-flag-4.0.0.tgz#944771fd9c81c81265c4d6941860da06bb59479b" + integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== + +supports-color@^7.1.0: + version "7.2.0" + resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-7.2.0.tgz#1b7dcdcb32b8138801b3e478ba6a51caa89648da" + integrity sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw== + dependencies: + has-flag "^4.0.0" + +typescript@^4.6.2: + version "4.6.2" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-4.6.2.tgz#fe12d2727b708f4eef40f51598b3398baa9611d4" + integrity sha512-HM/hFigTBHZhLXshn9sN37H085+hQGeJHJ/X7LpBWLID/fbc2acUMfU+lGD98X81sKP+pFa9f0DZmCwB9GnbAg==