19 changed files with 246 additions and 451 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,4 @@
-
+disco
 out
 *.o
 node_modules
--- a/disco.disco
+++ b/disco.disco
@ -1,3 +1,2 @@
 link log
-const a = "a"
+log("Hello World")
 log("hello world")
--- a/BIN
+++ b/BIN
--- a/disco_test.asm
+++ b/disco_test.asm
@ -1,19 +1,15 @@
 bits 64
 default rel
 section .data
-  GSDGYLUR db 97,0
+  EFDNYLFZ db 72,101,108,108,111,32,87,111,114,108,100,0
  STVGNPWI db 104,101,108,108,111,32,119,111,114,108,100,0
 section .text
-  global _main
+  global _start
-_main:
+_start:
  push rbp
  mov rbp, rsp
-  push qword [rel GSDGYLUR]
+  mov rdi, EFDNYLFZ
  mov rdi, STVGNPWI
  call _log
  mov rsp, rbp
  pop rbp
-  mov rax, 0x02000001
+  mov rax, 60
  mov rdi, 0
  syscall
 _log:
@ -28,12 +24,12 @@ _log_loop:
  jmp _log_loop
 _log_loop_end:
  mov rdx, rbx
-  mov rax, 0x02000004
+  mov rax, 1
  mov rdi, 1
  pop rsi
  syscall
  push 10
-  mov rax, 0x02000004
+  mov rax, 1
  mov rdi, 1
  mov rsi, rsp
  mov rdx, 1
--- a/src/compiler.ts
+++ b/src/compiler.ts
@ -23,7 +23,7 @@ const localVariables = new Map();
 const sections = {
  preamble() {
    if(process.platform === 'darwin') {
-      return 'bits 64\ndefault rel\n';
+      return '  global _main\n';
    } else {
      return '';
    }
@ -37,21 +37,10 @@ const sections = {
  },
  text() {
    if(process.platform === 'darwin') {
-      return (
+      return 'section .text\n_main:\n  push rbp\n  mov rbp, rsp\n  '
-        'section .text\n' +
+      + statements.join('\n  ')
-        '  global _main\n' +
+      + '\n  mov rsp, rbp\n  pop rbp\n  mov rax, 0x02000001\n  mov rdi, 0\n  syscall\n'
-        '_main:\n' +
+      + [...linkedLibraries.values()].map(({asmName, asm}) => asmName + ':\n' + asm).join('\n')
        '  push rbp\n' +
        '  mov rbp, rsp\n' +
        statements.map(v => `  ${v}\n`).join('') +
        '  mov rsp, rbp\n' +
        '  pop rbp\n' +
        '  mov rax, 0x02000001\n' +
        '  mov rdi, 0\n' +
        '  syscall\n' + [...linkedLibraries.values()]
          .map(({asmName, asm}) => asmName + ':\n' + asm)
          .join('\n')
      );
    } else {
      return 'section .text\n  global _start\n_start:\n  push rbp\n  mov rbp, rsp\n  '
        + statements.join('\n  ')
@ -117,10 +106,7 @@ function compileVariable(name, value) {
  });
  if(value.type === 'string') {
    const variableName = compileStringLiteral(value.value);
-    if(process.platform === 'darwin')
+    statements.push('push ' + variableName)
      statements.push(`push qword [rel ${variableName}]`);
    else
      statements.push('push ' + variableName);
  } else {
    console.error('dont know how to set a variable to a non string lol')
  }
--- a/src/disco.ts
+++ b/src/disco.ts
@ -3,8 +3,8 @@
 import { readFileSync } from "fs";
 import { compile } from "./compiler";
 import grammar from "./grammar";
 import { tokenize } from "./tokenizer";
 import colorize from "./util/asm/colorize";
 import tokenize from "./util/disco/tokenizer";
 import { printTokens } from "./util/utils";
 console.log();
@ -32,8 +32,8 @@ const asmFile = compile(ast)
 try {
  console.log();
  console.log('=== ASM ===');
  require('fs').writeFileSync('disco_test.asm', asmFile);
  console.log(colorize(asmFile));
  require('fs').writeFileSync('disco_test.asm', asmFile);
  console.log();
  console.log('=== nasm ===');
@ -59,7 +59,6 @@ function ld() {
    require('child_process').execSync([
      'ld', 'disco_test.o',
      '-o', 'disco_test',
      '-no_pie',
      '-macosx_version_min', '11.0',
      '-L', '/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib',
      '-lSystem'
--- a/src/earley.ts
+++ b/src/earley.ts
@ -37,7 +37,6 @@ export class Terminal extends Token { static terminal: true = true };
 // these tokens are special, for formatting and generalization reasons.
 export class $Newline extends Terminal { }
 export class $Whitespace extends Terminal { }
 export class $EOF extends Terminal { }
 function isTerminal(tokenClass: TokenClass): tokenClass is TerminalTokenClass {
  return tokenClass.terminal;
--- a/src/grammar.ts
+++ b/src/grammar.ts
@ -1,32 +1,50 @@
-import { $Newline, Grammar, NonTerminal, Production, Terminal, Token } from "./earley";
+import { Grammar, NonTerminal, Production, Terminal, Token } from "./earley";
 import { AST } from './ast';
-import * as t from './util/disco/tokens';
+
 export class $KeywordLink extends Terminal { }
 export class $KeywordEquals extends Terminal { }
 export class $KeywordLParen extends Terminal { }
 export class $KeywordRParen extends Terminal { }
 export class $KeywordConst extends Terminal { }
 export class $String extends Terminal {}
 export class $Identifier extends Terminal {}
 export class $Newline extends Terminal { }
 export class $Program extends NonTerminal { }
 export class $Statement extends NonTerminal { }
 export class $LinkStatement extends NonTerminal { }
 export class $VariableDeclaration extends NonTerminal { }
 export class $Expression extends NonTerminal { }
 export class $InvocationExpression extends NonTerminal { }
 export class $VariableReference extends NonTerminal { }
 const ps: Production[] = [
-  { left: t.$Program, right: [t.$Statement], resolver: (s) => !!s ? AST.Body([s]) : AST.Body([]) },
+  { left: $Program, right: [$Statement], resolver: (s) => !!s ? AST.Body([s]) : AST.Body([]) },
-  { left: t.$Program, right: [t.$Statement, t.$Program], resolver: (s, ss) => !!s ? AST.Body([s, ...ss.value]) : ss},
+  { left: $Program, right: [$Statement, $Program], resolver: (s, ss) => !!s ? AST.Body([s, ...ss.value]) : ss},
-  { left: t.$Statement, right: [$Newline], resolver: () => false },
+  { left: $Statement, right: [$Newline], resolver: () => false },
-  { left: t.$Statement, right: [t.$LinkStatement], resolver: a => a },
+  { left: $Statement, right: [$LinkStatement], resolver: a => a },
-  { left: t.$Statement, right: [t.$VariableDeclaration], resolver: a => a },
+  { left: $Statement, right: [$VariableDeclaration], resolver: a => a },
-  { left: t.$Statement, right: [t.$Expression], resolver: a => a },
+  { left: $Statement, right: [$Expression], resolver: a => a },
-  { left: t.$Expression, right: [t.$String], resolver: (s: t.$String) => AST.String(s.value) },
+  { left: $Expression, right: [$String], resolver: (s: $String) => AST.String(s.value) },
-  { left: t.$Expression, right: [t.$InvocationExpression], resolver: a => a },
+  { left: $Expression, right: [$InvocationExpression], resolver: a => a },
-  { left: t.$Expression, right: [t.$VariableReference], resolver: a => a },
+  { left: $Expression, right: [$VariableReference], resolver: a => a },
-  { left: t.$VariableReference, right: [t.$Identifier], resolver: (identifier: t.$Identifier) => AST.VariableReference(identifier.value) },
+  { left: $VariableReference, right: [$Identifier], resolver: (identifier: $Identifier) => AST.VariableReference(identifier.value) },
-  { left: t.$InvocationExpression, right: [t.$Identifier, t.$KeywordLParen, t.$Expression, t.$KeywordRParen],
+  { left: $InvocationExpression, right: [$Identifier, $KeywordLParen, $Expression, $KeywordRParen],
-    resolver: (identifier: t.$Identifier, _, arg: any, __) => AST.Invocation(identifier.value, arg) },
+    resolver: (identifier: $Identifier, _, arg: any, __) => AST.Invocation(identifier.value, arg) },
-  { left: t.$VariableDeclaration, right: [t.$KeywordConst, t.$Identifier, t.$KeywordEquals, t.$Expression],
+  { left: $VariableDeclaration, right: [$KeywordConst, $Identifier, $KeywordEquals, $Expression],
-    resolver: (_, identifier: t.$Identifier, __, value: any) => AST.Const(identifier.value, value) },
+    resolver: (_, identifier: $Identifier, __, value: any) => AST.Const(identifier.value, value) },
-  { left: t.$LinkStatement, right: [t.$KeywordLink, t.$Identifier], resolver: (_, identifier: t.$Identifier) => AST.Link(identifier.value) },
+  { left: $LinkStatement, right: [$KeywordLink, $Identifier], resolver: (_, identifier: $Identifier) => AST.Link(identifier.value) },
 ]
-const grammar = new Grammar(ps, t.$Program);
+const grammar = new Grammar(ps, $Program);
 export default grammar;
--- a/src/tokenizer.ts
+++ b/src/tokenizer.ts
@ -0,0 +1,85 @@
 import * as chalk from 'chalk';
 import { readFileSync, writeFileSync } from 'fs';
 import { $Identifier, $KeywordConst, $KeywordEquals, $KeywordLink, $KeywordLParen, $KeywordRParen, $Newline, $String } from './grammar';
 const keywords = new Map([
  ['=', $KeywordEquals],
  ['(', $KeywordLParen],
  [')', $KeywordRParen],
  ['link', $KeywordLink],
  ['const', $KeywordConst],
 ]);
 export function tokenize(string) {
  let inString = false;
  let escaping = false;
  let tokens = [];
  let token = '';
  // let line = 1;
  // let col = 1;
  // const newline = () => (col = 1, line ++);
  // const nextColumn = () => line ++;
  const resetToken = () => token = '';
  const addToken = (_token?) => {
    if(_token) {
      token = _token;
    }
    if(token.trim() !== '') {
      if(keywords.has(token)) {
        const kwTokenClass = keywords.get(token);
        tokens.push(new kwTokenClass(0, 0, token));
      } else if (isStringDelim(token[0]))
        tokens.push(new $String(0, 0, token.substring(1, token.length - 1)));
      else if (token === 'NEWLINE')
        tokens.push(new $Newline(0, 0, token))
      else
        tokens.push(new $Identifier(0, 0, token));
      resetToken();
    }
  }
  // let _line = line;
  // let _col = col;
  const isWhitespace = (char) => [' ', '\n', '\t', '\r'].includes(char);
  const isNewline = (char) => char === '\n';
  const isSingleCharToken = (char) => ['(', ')', '='].includes(char);
  const isStringDelim = (char) => ["'", '"'].includes(char);
  const isEscapeChar = (char) => char === '\\';
  const escape = (char) => (char === 'n' ? '\n'
                          : char === 't' ? '\t'
                          : char === 'r' ? '\r' : char)
  for (const char of string) {
    if(isNewline(char)) {
      // newline();
      addToken();
      // only add newlines if we've actually started tokens...
      if(tokens.length > 0)
        addToken('NEWLINE')
    } else if (escaping) {
      token += escape(char)
      escaping = false;
    } else if (isStringDelim(char)) {
      token += char;
      inString = !inString;
    } else if (inString) {
      if(isEscapeChar(char)) {
        escaping = true;
      } else {
        token += char
      }
    } else if(isSingleCharToken(char)) {
      addToken();
      addToken(char);
    } else if(isWhitespace(char)) {
      addToken();
    } else {
      token += char;
    }
    // if(!isNewline(char))
    //   nextColumn();
  }
  return tokens;
 }
--- a/src/util/asm/grammar.ts
+++ b/src/util/asm/grammar.ts
@ -26,12 +26,8 @@ export default new Grammar([
  { left: t.$Line, right: [t.$Global, t.$Identifier],
    resolver: (_, {value}) => `  ${ansi(...keywordColor).bold('global')} ${ansi(...identifierColor)(value)}` },
  { left: t.$Line, right: [t.$Identifier, t.$Colon], resolver: ({value}) => `${ansi(...identifierColor)(value)}:` },
  { left: t.$Line, right: [t.$Bits, t.$Number], resolver: (_, n) => `${ansi(...keywordColor).bold('bits')} ${ansi(...numberColor)(n.value)}`},
  { left: t.$Line, right: [t.$Default, t.$Rel], resolver: () => `${ansi(...keywordColor).bold('default')} ${ansi(...keywordColor).bold('rel')}`},
  // actual instructions
  { left: t.$Line, right: [t.$Push, t.$DataSize, t.$LBracket, t.$Rel, t.$Identifier, t.$RBracket],
    resolver: (_, size, __, ___, identifier) => `  ${ansi(...instructionColor)('push')} ${size} ${ansi(...pointerColor)('[')}${ansi(...keywordColor).bold('rel')} ${ansi(...identifierColor)(identifier.value)}${ansi(...pointerColor)(']')}` },
  { left: t.$Line, right: [t.$Push, t.$Value], resolver: (_, v) => `  ${ansi(...instructionColor)('push')} ${v}` },
  { left: t.$Line, right: [t.$Pop, t.$Value], resolver: (_, v) => `  ${ansi(...instructionColor)('pop')} ${v}` },
  { left: t.$Line, right: [t.$Cmp, t.$Register, t.$Comma, t.$Value],
@ -56,10 +52,5 @@ export default new Grammar([
  { left: t.$Value, right: [t.$Identifier], resolver: (v) => ansi(...identifierColor)(v.value) },
  { left: t.$CompoundString, right: [t.$Number], resolver: (n) => ansi(...numberColor)(n.value) },
-  { left: t.$CompoundString, right: [t.$Number, t.$Comma, t.$CompoundString], resolver: (n, _, ns) => ansi(...numberColor)(n.value) + ',' + ns },
+  { left: t.$CompoundString, right: [t.$Number, t.$Comma, t.$CompoundString], resolver: (n, _, ns) => ansi(...numberColor)(n.value) + ',' + ns }
  { left: t.$DataSize, right: [t.$Word], resolver: (v) => ansi(...keywordColor).bold(v.value) },
  { left: t.$DataSize, right: [t.$DWord], resolver: (v) => ansi(...keywordColor).bold(v.value) },
  { left: t.$DataSize, right: [t.$QWord], resolver: (v) => ansi(...keywordColor).bold(v.value) },
  { left: t.$DataSize, right: [t.$OWord], resolver: (v) => ansi(...keywordColor).bold(v.value) },
 ], t.$Program);
--- a/src/util/asm/tokenizer.ts
+++ b/src/util/asm/tokenizer.ts
@ -4,58 +4,28 @@ import {
  $Newline,
 } from "./../../earley";
-const asmTokenizer = createTokenizer([
+export default createTokenizer([
-  // whitespaces
+  { match: /^[\r\t ]{1,}$/, token: null },
-  [ /^[\r\t ]{1,}/, null],
+  { match: 'section', token: tokens.$Section },
-  [ /^\n/, $Newline],
+  { match: 'db', token: tokens.$Db },
-
+  { match: 'global', token: tokens.$Global },
-  // keywords
+  { match: '\n', token: $Newline },
-  [ /^section/, tokens.$Section],
+  { match: ':', token: tokens.$Colon },
-  [ /^db/, tokens.$Db],
+  { match: ',', token: tokens.$Comma },
-  [ /^global/, tokens.$Global],
+  { match: '[', token: tokens.$LBracket },
-  [ /^bits/, tokens.$Bits],
+  { match: ']', token: tokens.$RBracket },
-  [ /^default/, tokens.$Default],
+  { match: '-', token: tokens.$Minus },
-  [ /^rel/, tokens.$Rel],
+  { match: 'mov', token: tokens.$Mov },
-  [ /^word/, tokens.$Word],
+  { match: 'push', token: tokens.$Push },
-  [ /^dword/, tokens.$DWord],
+  { match: 'pop', token: tokens.$Pop },
-  [ /^qword/, tokens.$QWord],
+  { match: 'call', token: tokens.$Call },
-  [ /^oword/, tokens.$OWord],
+  { match: 'syscall', token: tokens.$Syscall },
-  
+  { match: 'ret', token: tokens.$Ret },
-  // punctuation
+  { match: 'je', token: tokens.$Je },
-  [ /^:/, tokens.$Colon],
+  { match: 'jmp', token: tokens.$Jmp },
-  [ /^,/, tokens.$Comma],
+  { match: 'cmp', token: tokens.$Cmp },
-  [ /^\[/, tokens.$LBracket],
+  { match: 'inc', token: tokens.$Inc },
-  [ /^\]/, tokens.$RBracket],
+  { match: /^[0-9]{1,}$/, token: tokens.$Number },
-  [ /^-/, tokens.$Minus],
+  { match: /^(rbp|rsp|rax|rcx|rbx|rdx|rdi|rsi|al|bl|cl|dl|ah|bh|ch|dh|ax|bx|cx|dx|eax|ebx|ecx|edx)$/, token: tokens.$Register },
-
+  { match: /^[A-Za-z._][A-Za-z_]{0,}$/, token: tokens.$Identifier },
  // instructions
  [ /^mov/, tokens.$Mov],
  [ /^push/, tokens.$Push],
  [ /^pop/, tokens.$Pop],
  [ /^syscall/, tokens.$Syscall],
  [ /^ret/, tokens.$Ret],
  [ /^je/, tokens.$Je],
  [ /^jmp/, tokens.$Jmp],
  [ /^cmp/, tokens.$Cmp],
  [ /^inc/, tokens.$Inc],
  // pseudo-instructions
  [ /^call/, tokens.$Call],
  // 8 bit general purpose registers...
  [ /^(al|ah|bl|bh|cl|ch|dl|dh)/, tokens.$Register ],
  // 16 bit general purpose registers...
  [ /^(ax|bx|cx|dx)/, tokens.$Register ],
  // 32 bit general purpose registers...
  [ /^(eax|ebx|ecx|edx)/, tokens.$Register ],
  // 64 bit general purpose registers...
  [ /^(rax|rbx|rcx|rdx)/, tokens.$Register ],
  // other registers, idk.
  [ /^(rbp|rsp|rdi|rsi)/, tokens.$Register],
  // user-defined
  [ /^[0-9]{1,}/, tokens.$Number],
  [ /^0x[0-9A-Fa-f]{1,}/, tokens.$Number],
  [ /^[A-Za-z._][A-Za-z_]{0,}/, tokens.$Identifier]
 ])
 export default asmTokenizer;
--- a/src/util/asm/tokens.ts
+++ b/src/util/asm/tokens.ts
@ -21,13 +21,6 @@ export class $RBracket extends Terminal { }
 export class $Comma extends Terminal { }
 export class $Colon extends Terminal { }
 export class $Minus extends Terminal { }
 export class $Bits extends Terminal { }
 export class $Default extends Terminal { }
 export class $Rel extends Terminal { }
 export class $Word extends Terminal { }
 export class $DWord extends Terminal { }
 export class $QWord extends Terminal { }
 export class $OWord extends Terminal { }
 // varying tokens
 export class $Identifier extends Terminal { }
@ -41,4 +34,3 @@ export class $PointerDereference extends NonTerminal { }
 export class $Program extends NonTerminal { }
 export class $CompoundString extends NonTerminal { }
 export class $Value extends NonTerminal { }
 export class $DataSize extends NonTerminal { }
--- a/src/util/asmLogger.ts
+++ b/src/util/asmLogger.ts
@ -0,0 +1,3 @@
 export function logASM(asm: string) {
 }
--- a/src/util/disco/tokenizer.ts
+++ b/src/util/disco/tokenizer.ts
@ -1,9 +0,0 @@
 import { $Newline } from "../../earley";
 import { createTokenizer } from "../generalTokenizer";
 import * as t from './tokens';
 export default createTokenizer([
  [ /^[\r\t ]{1,}/, null],
  [ /^\n/, $Newline],
  [/[a-zA-Z][A-Za-z0-9]{0,}/, t.$Identifier],
 ])
--- a/src/util/disco/tokens.ts
+++ b/src/util/disco/tokens.ts
@ -1,18 +0,0 @@
 import { NonTerminal, Terminal } from "../../earley";
 export class $KeywordLink extends Terminal { }
 export class $KeywordEquals extends Terminal { }
 export class $KeywordLParen extends Terminal { }
 export class $KeywordRParen extends Terminal { }
 export class $KeywordConst extends Terminal { }
 export class $String extends Terminal {}
 export class $Identifier extends Terminal {}
 export class $Program extends NonTerminal { }
 export class $Statement extends NonTerminal { }
 export class $LinkStatement extends NonTerminal { }
 export class $VariableDeclaration extends NonTerminal { }
 export class $Expression extends NonTerminal { }
 export class $InvocationExpression extends NonTerminal { }
 export class $VariableReference extends NonTerminal { }
--- a/src/util/generalTokenizer.ts
+++ b/src/util/generalTokenizer.ts
@ -1,83 +1,72 @@
-import { Terminal, TerminalTokenClass } from "../earley";
+import { TerminalTokenClass } from "../earley";
-import { Matcher } from "./regex";
+import { inspect } from 'util';
-type TokenMatcher = [ RegExp, TerminalTokenClass ];
+interface TokenMatcher {
-type Index = number;
+  match: RegExp | string,
-
+  token: TerminalTokenClass
 interface Match {
  regex: RegExp;
  length: number;
  tokenClass: TerminalTokenClass;
  matchedString: string;
 }
 // this is kinda bullshit lol exec is a dumb method.
 function getFirstMatch(r: RegExp | Matcher, str: string): [Index, string] {
  if (r instanceof RegExp) {
    let matches = str.match(r);
    if(matches === null) return [-1, ''];
    return [matches.index, matches[0]];
  }
 }
 const getMatchesFromTokenMatcher =
  (str: string) =>
  ([regex, tokenClass]: TokenMatcher): Match =>
 {
  const [index, match] = getFirstMatch(regex, str);
  if(index === -1) return null;
  return {
    regex,
    tokenClass,
    length: match.length,
    matchedString: match
  }
 }
 const advanceLC = (l: number, c: number, str: string) => {
  for(const char of str) {
    c ++;
    if(char === '\n') {
      l ++;
      c = 1;
    }
  }
  return [l, c];
 }
 export function createTokenizer(tokenMap: TokenMatcher[]) {
  return function tokenize(str: string) {
    let tokens = [];
    let token = '';
    let line = 1, column = 0;
    for(let i = 0; i < str.length; i ++) {
      const char = str[i];
      const lookahead = (i < str.length - 1 ? str[i + 1] : null)
      column++;
      token += char;
-  return function tokenize(str: string, l = 1, c = 1): Terminal[] {
+      for(const {match: matcher, token: tokenClass} of tokenMap) {
        if(typeof matcher === 'string') {
          if(matcher === token) {
            if(tokenClass !== null) {
              tokens.push(new tokenClass(line, column - token.length + 1, token));
            }
            token = '';
          } else {
            // dw about it
          }
        } else {
          // matcher is regex...
          // * note: this only tests if token contains a match, not that it _is_ a match
          if(matcher.test(token)) {
            if(lookahead) {
              if(!matcher.test(token + lookahead)) {
                // the next character would not match, so this must be the match.
                // ! PS: it is possible that even though this would no longer
                // ! match, another matcher could still match more.
                // ! in those cases, we would want to expand on this logic
                // ! to only match if there are no matches for any matcher
                // ! in the lookahead.
                // ! in practice this means tracking all possible non lookahead
                // ! matches, then testing them for their lookahead afterwards
                // ! in another loop, and only tokenizing if you have only one
                // ! option, and that option will fail on the lookahead.
                if(tokenClass !== null) {
                  tokens.push(new tokenClass(line, column - token.length + 1, token));
                }
                token = '';
              } else {
                // the lookahead matches this too, so we should probably hold off
                // on tokenizing it...
              }
            } else {
              if(tokenClass !== null) {
                tokens.push(new tokenClass(line, column - token.length + 1, token));
              }
              token = '';
            }
          }
        }
      }
-    const possibleMatches: Match[] = tokenMap
+      if(char === '\n') {
-      .map(getMatchesFromTokenMatcher(str))
+        line ++;
-      .filter(v => !!v);
+        column = 0;
-
+      }
    const longestLength = possibleMatches
      .map(v => v.length)
      .reduce((a, v) => a > v ? a : v, -Infinity);
    const longestMatches = possibleMatches
      .filter(v => v.length === longestLength);
    console.assert(longestMatches.length > 0, 'No token matches found');
    if(longestMatches.length === 0) process.exit(1);
    const {tokenClass, matchedString} = longestMatches[0];
    const length = matchedString.length;
    const rest = str.substring(length);
    const token = tokenClass ? new tokenClass(l, c, matchedString) : null;
    if(rest === '') return [ token ];
    [l, c] = advanceLC(l, c, str);
    if(tokenClass) {
      return [
        new tokenClass(l, c, matchedString),
        ...tokenize(rest, l, c)
      ]
    }
-    return token ? [token, ...tokenize(rest, l, c)] : tokenize(rest, l, c);
+
    return tokens;
  }
 }
--- a/src/util/regex.ts
+++ b/src/util/regex.ts
@ -1,176 +0,0 @@
 type Match = {
  offset: number;
  length: number;
  text: string;
  original: string;
 }
 const match = (offset: number, length: number, text: string, original: string): Match => {
  return { offset, length, text, original };
 }
 export type Matcher = (str: string) => Match[]
 export const matchChar = (char: string): Matcher => {
  const matcher = (test: string) => {
    return test[0] === char[0] ? [match(0, 1, test[0], test)] : []
  }
  matcher.toString = () => {
    return char;
  }
  return matcher;
 }
 export const matchCharClass = (chars: string[]): Matcher => {
  const matcher = (test: string) => {
    return chars.includes(test[0]) ? [match(0, 1, test[0], test)] : []
  }
  matcher.toString = () => {
    return '[' + chars.join('') + ']';
  }
  return matcher;
 }
 const combineMatches = (a: Match, b: Match): Match => {
  return match(
    Math.min(a.offset, b.offset),
    a.length + b.length,
    a.text + b.text,
    a.original.length > b.original.length ? a.original : b.original
  )
 }
 export const matchSequence = (matcherA: Matcher, matcherB: Matcher): Matcher => {
  const matcher = (test: string) => {
    const matches = [];
    for (const match of matcherA(test)) {
      const rest = test.substring(match.length);
      for (const restMatch of matcherB(rest)) {
        matches.push(combineMatches(match, restMatch));
      }
    }
    return matches;
  }
  matcher.toString = () => {
    return matcherA.toString() + matcherB.toString();
  }
  return matcher;
 }
 const repeatMatcher = (matcher: Matcher, test: string, n: number): Match[] => {
  if(n === 0) {
    return [match(0, 0, '', test)];
  }
  const matches = matcher(test);
  if(n === 1) {
    return matches;
  }
  return matches.map(match => {
    const rest = match.original.substring(match.length);
    return repeatMatcher(matcher, rest, n - 1).map(nextMatch => combineMatches(match, nextMatch));
  }).flat();
 }
 // this logic sucks lol
 // really you should just keep matching until you
 // have no more characters or you hit the match limit.
 // like this shit increases O by 2 on each nested call...
 // TODO /\ \/ /\ \/ /\ \/ /\ \/ /\ \/ /\ \/ /\ \/ /\
 export const matchMany = (matcherA: Matcher, min = 1, max = Infinity): Matcher => {
  const matcher = (test: string) => {
    const rmatches: Match[] = [];
    const limitedMax = Math.min(max, test.length);
    for(let c = min; c <= limitedMax; c ++) {
      const matches = repeatMatcher(matcherA, test, c);
      rmatches.push(...matches);
    }
    return rmatches;
  }
  matcher.toString = () => {
    return '(' + (matcherA.toString()) + '){' + (min === 0 ? '' : min) + ',' + (max === Infinity ? '' : max) + '}';
  }
  return matcher;
 }
 // variable names regex, theory...
 const matchers = [
  matchChar('a'),
  matchCharClass(['a', 'b', 'c']),
  matchSequence(
    matchChar('a'),
    matchCharClass(['a', 'b', 'c'])
  ),
  matchMany(
    matchCharClass(['a', 'b', 'c'])
  ),
  matchMany(
    matchCharClass(['a', 'b', 'c']),
    1,
    1
  ),
 ];
 const tests = [
  'a',
  'b',
  'c',
  'd',
  'ab',
  'bc',
  'cd',
  'da',
 ]
 console.clear();
 const logMatches = (ms: Match[]) => {
  for(const match of ms) {
    console.log(
      ' '.repeat(8) +
      chalk.white(match.original.substring(0, match.offset)) +
      chalk.green(match.text) + 
      chalk.white(match.original.substring(match.offset + match.length))
    );
  }
 }
 const Y = true;
 const N = false;
 const testMatrix = [
  [Y, N, N, N, N, N, N, N],
  [Y, Y, Y, N, N, N, N, N],
  [N, N, N, N, Y, N, N, N],
  [Y, Y, Y, N, Y, Y, N, N],
  [Y, Y, Y, N, N, N, N, N]
 ]
 import * as chalk from 'chalk';
 // dirty levels off the CHARTS
 let i = 0, j = 0, p = 0, f = 0;
 for (const matcher of matchers) {
  j = 0;
  for (const testString of tests) {
    const matches = matcher(testString).filter(match => match.length === testString.length);
    if (matches.length > 0 === testMatrix[i][j]) {
      p ++;
    } else {
      f ++;
      console.log(
        chalk.red('[ FAIL ]'),
        chalk.ansi256(143)('/' + matcher.toString() + '/'),
        'incorrectly returned',
        matches.length,
        'match' + (matches.length !== 1 ? 'es' : '') + ' for',
        testString,
      );
      logMatches(matches);
      console.log('')
    }
    j++;
  }
  i++
 }
 console.log('' + p + ' test' + (p !== 1 ? 's' : '') + ' passed.')
 console.log('' + f + ' test' + (f !== 1 ? 's' : '') + ' failed.')
 process.exit(f);
--- a/src/util/tokenTest.ts
+++ b/src/util/tokenTest.ts
@ -6,10 +6,10 @@ class $Plus extends Terminal { }
 class $Newline extends Terminal { }
 const tokenizer = createTokenizer([
-  [ /^[0-9]{1,}$/,       $Number  ],
+  { match: /^[0-9]{1,}$/,       token: $Number  },
-  [ /^[\r\t ]{1,}$/,     null     ],
+  { match: /^[\r\t ]{1,}$/,     token: null     },
-  [ /\n/,                $Newline ],
+  { match: '\n',                token: $Newline },
-  [ /+/,                 $Plus    ],
+  { match: '+',                 token: $Plus    },
 ])
 console.log(tokenizer("5 + \n 6   ").map(v => v.toString()).join('  '));
--- a/todo.md
+++ b/todo.md
@ -1,42 +1,13 @@
 # Todo List
- [x] colorize the assembly output
+[x] colorize the assembly output
- [x] create generalTokenizer to make tokenization generic
+[ ] rewrite disco tokenizer to the new generalTokenizer
- [ ] rewrite disco tokenizer to the new generalTokenizer
+[ ] add number support
- [ ] explore defining non terminals in a grammar with just a string
+[ ] add comment support
-  - possibly using tagged template strings?? 
+[ ] add fixed length array support
- [ ] add an EOF token to earley, and yknow, add it to the tokenizer.
+[ ] organize AST elements into classes
- [ ] add number support in consts
+[ ] better logging of the AST
- [ ] add comment support
+[ ] optionally artifically slow down compilation (for fun)
- [ ] add fixed length array support
+[ ] implement some basic maths operations
- [ ] organize AST elements into classes
+[ ] implement multi-argument invocations
- [ ] better logging of the AST
+[ ] implement return values
 - [ ] optionally artifically slow down compilation (for fun)
 - [ ] implement functions
 - [ ] implement some basic maths operations
 - [ ] implement multi-argument invocations
 - [ ] implement return values
 - [ ] write a regex compiler
 - [ ] write log in disco. creat a library for just doing syscalls. the rest can be done in disco
 # Changelog
 - fixed macos compilation to use relative addressing (i think)
 - fixed a bug in the general tokenizer that failed to match some tokens properly.
 ---
 - create generalized tokenizer
 - implement assembly language grammar for syntax highlighting
 - create a vscode extension for syntax highlighting
 ---
 - compile disco code to assembly as POC
 - create an AST for disco code
 - implement earley grammar for disco including:
  - linking library functions
  - calling functions
  - string literals
  - string variables
 - created earley parser