ASTokenizer.pas

File name
C:\Users\Andreas Rejbrand\Documents\Dev\AlgoSim\ASTokenizer.pas
Date exported
Time exported
Formatting processor
TPascalFormattingProcessor
unit ASTokenizer;

{ **************************************************************************** }
{ Rejbrand AlgoSim Tokenizer                                                   }
{ Copyright © 2019 Andreas Rejbrand                                            }
{ https://english.rejbrand.se/                                                 }
{ **************************************************************************** }

{$WARN SYMBOL_PLATFORM OFF}
{$WARN DUPLICATE_CTOR_DTOR OFF}

interface

uses
  SysUtils, Types, Classes, Generics.Defaults, Generics.Collections,
  ASKernelDefs, ASExpression;

type
  TOpKindHelper = record helper for TOpKind
    function ToString: string;
  end;

  TOpAssoc = (osLeft, osRight);

  TOpAssocHelper = record helper for TOpAssoc
    function ToString: string;
  end;

  TPrecGroup = record
    Precedence: Integer;
    Associativity: TOpAssoc;
  end;

  TOperatorHelper = record helper for TOperator
    function Symbol: Char;
    function Kind: TOpKind;
    function Precedence: TPrecGroup;
    function &Function: TASFunctionClass;
    function ListLeft: Boolean; inline;
    function ListRight: Boolean; inline;
    function Collapse: Boolean; inline;
  end;

  TBracket = (bRound, bCurly, bSquare, bInterval, bVector, bFloor, bCeil);

const
  Brackets: array[TBracket, {Closing: }Boolean] of Char =
    (
      (
        '(',
        ')'
      ),
      (
        '{',
        '}'
      ),
      (
        '[',
        ']'
      ),
      (
        '[',
        ']'
      ),
      (
        '❨',
        '❩'
      ),
      (
        '⌊',
        '⌋'
      ),
      (
        '⌈',
        '⌉'
      )
    );
  BracketApplies: array[TBracket] of Boolean =
    (True, False, True, False, False, False, False);
  BracketEmpty: array[TBracket, {Applies: }Boolean] of Boolean =
    (
      (
        False,
        True
      ),
      (
        True,
        True
      ),
      (
        False,
        False
      ),
      (
        False,
        False
      ),
      (
        False,
        False
      ),
      (
        False,
        False
      ),
      (
        False,
        False
      )
    );

type
  TToken = class
  public
  type
    TBracketRec = record
      ID: TBracket;
      Closing: Boolean;
      Applying: Boolean;
      Partner: Integer;
      function Opening: Boolean; inline;
    const
      Precedence: TPrecGroup =
        (
          Precedence: 1;
          Associativity: osLeft;
        );
    end;
    TLiteralRec = record
      Integer: Int64;
      Float: Extended;
      &String: string;
    end;
  var
    Kind: TTokenKind;
    &Operator: TOperator;
    BracketID: Integer;
    Bracket: TBracketRec;
    Literal: TLiteralRec;
    Identifier: string;
    Pos: Integer;
    function ToString: string; override;
    function IsLeftBracket: Boolean; inline;
    function IsLeftOpBracket: Boolean; inline;
    function IsLeftNoOpBracket: Boolean; inline;
    function IsRightBracket: Boolean; inline;
    function IsLiteral: Boolean; inline;
    function IsLiteralOrIdent: Boolean; inline;
    function IsValueStart: Boolean; inline;
    function IsValueEnd: Boolean; inline;
    function Precedence: TPrecGroup;
  private
    constructor CreateOp(AOperator: TOperator; APos, ABracketID: Integer);
    constructor CreateLit(const AInteger: Int64; APos: Integer); overload;
    constructor CreateLit(const AFloat: Extended; APos: Integer); overload;
    constructor CreateLit(const AString: string; APos: Integer); overload;
    constructor CreateBracket(ABracket: TBracket; AClosing, AApplying: Boolean;
      APos: Integer; APartner: Integer = 0);
    constructor CreateComma(APos: Integer);
    constructor CreateIdent(const AName: string; APos: Integer);
    constructor CreateNull(APos: Integer);
  end;

  TTokenizer = record
  strict private
    class var FOpDict: TDictionary<Char, TOperator>;
    class var FInvFS: TFormatSettings;
    class constructor ClassCreate;
    class destructor ClassDestroy;
    class function TryChrToOp(AChar: Char; AExprExpected: Boolean;
      out AOperator: TOperator): Boolean; static; inline;
    class function IsOperator(AChar: Char): Boolean; static; inline;
    class function TryChrToBracket(AChar: Char; out ABracket: TBracket;
      out AClosing: Boolean): Boolean; static;
  public
    class function Tokenize(AExpression: string): TObjectList<TToken>; static;
  end;

implementation

uses
  Math, StrUtils, Character, ASFunctions;

function pg(APrecedence: Integer; AAssociativity: TOpAssoc = osLeft): TPrecGroup; inline;
begin
  Result.Precedence := APrecedence;
  Result.Associativity := AAssociativity;
end;

function Capitalize(const AText: string): string;
var
  i: Integer;
begin
  Result := AText;
  for i := 1 to Result.Length do
    if Result[i].IsLetter then
    begin
      Result[i] := Result[i].ToUpper;
      Break;
    end;
end;

{ TOpKindHelper }

function TOpKindHelper.ToString: string;
begin
  case Self of
    okPrefix:
      Result := 'prefix';
    okPostfix:
      Result := 'postfix';
    okInfix:
      Result := 'infix';
  else
    Result := '';
  end;
end;

{ TOperatorHelper }

function TOperatorHelper.Symbol: Char;
begin
  case Self of
    oUnaryPlus, oPlus, oLooseUnaryPlus:
      Result := '+';
    oUnaryMinus, oMinus, oLooseUnaryMinus:
      Result := '−';
    oAsterisk:
      Result := '*';
    oSolidus:
      Result := '/';
    oCircumflex:
      Result := '^';
    oCDot:
      Result := '⋅';
    oCRing:
      Result := '∘';
    oSqrt:
      Result := '√';
    oCross:
      Result := '×';
    oExclamation:
      Result := '!';
    oQuestion:
      Result := '?';
    oPercent:
      Result := '%';
    oPermille:
      Result := '‰';
    oDegrees:
      Result := '°';
    oPeriod:
      Result := '.';
    oBar:
      Result := '|';
    oAmp:
      Result := '&';
    oAt:
      Result := '@';
    oColon:
      Result := ':';
    oSemicolon:
      Result := ';';
    oLessThan:
      Result := '<';
    oLessEqual:
      Result := '≤';
    oGreaterThan:
      Result := '>';
    oGreaterEqual:
      Result := '≥';
    oEquals:
      Result := '=';
    oNotEquals:
      Result := '≠';
    oApprox:
      Result := '≈';
    oAnd:
      Result := '∧';
    oOr:
      Result := '∨';
    oXor:
      Result := '⊻';
    oNand:
      Result := '⊼';
    oNor:
      Result := '⊽';
    oNot:
      Result := '¬';
    oUnion:
      Result := '∪';
    oIntersection:
      Result := '∩';
    oSetDifference:
      Result := '∖';
    oSymDiff:
      Result := '∆';
    oComplement:
      Result := '∁';
    oIn:
      Result := '∈';
    oNi:
      Result := '∋';
    oNotIn:
      Result := '∉';
    oNotNi:
      Result := '∌';
    oSubset:
      Result := '⊂';
    oSubsetEq:
      Result := '⊆';
    oSubsetNEq:
      Result := '⊊';
    oSuperset:
      Result := '⊃';
    oSupersetEq:
      Result := '⊇';
    oSupersetNEq:
      Result := '⊋';
    oDivides:
      Result := '∣';
    oNDivides:
      Result := '∤';
    oParallel:
      Result := '∥';
    oNParallel:
      Result := '∦';
    oOrthogonal:
      Result := '⟂';
    oCircledPlus:
      Result := '⊕';
    oCircledTimes:
      Result := '⊗';
    oCircledDot:
      Result := '⊙';
    oImpliesRight:
      Result := '⇒';
    oImpliesLeft:
      Result := '⇐';
    oEquivalent:
      Result := '⇔';
    oTo:
      Result := '→';
    oMapsTo:
      Result := '↦';
    oAssign:
      Result := '≔';
    oTilde:
      Result := '∼';
    oNumberSignPrefix, oNumberSignPostfix:
      Result := '#';
    oSection:
      Result := '§';
    oBackslash:
      Result := '\';
  else
    raise ESyntaxException.Create('TOperatorHelper.Symbol: Unknown operator.');
  end;
end;

function TOperatorHelper.Kind: TOpKind;
begin
  case Self of
    oUnaryPlus, oLooseUnaryPlus:
      Result := okPrefix;
    oUnaryMinus, oLooseUnaryMinus:
      Result := okPrefix;
    oPlus:
      Result := okInfix;
    oMinus:
      Result := okInfix;
    oAsterisk:
      Result := okPostfix;
    oSolidus:
      Result := okInfix;
    oCircumflex:
      Result := okInfix;
    oCDot:
      Result := okInfix;
    oCRing:
      Result := okInfix;
    oSqrt:
      Result := okPrefix;
    oCross:
      Result := okInfix;
    oExclamation:
      Result := okPostfix;
    oQuestion:
      Result := okPostfix;
    oPercent:
      Result := okPostfix;
    oPermille:
      Result := okPostfix;
    oDegrees:
      Result := okPostfix;
    oPeriod:
      Result := okInfix;
    oBar:
      Result := okInfix;
    oAmp:
      Result := okPrefix;
    oAt:
      Result := okInfix;
    oColon:
      Result := okInfix;
    oSemicolon:
      Result := okInfix;
    oLessThan:
      Result := okInfix;
    oLessEqual:
      Result := okInfix;
    oGreaterThan:
      Result := okInfix;
    oGreaterEqual:
      Result := okInfix;
    oEquals:
      Result := okInfix;
    oNotEquals:
      Result := okInfix;
    oApprox:
      Result := okInfix;
    oAnd:
      Result := okInfix;
    oOr:
      Result := okInfix;
    oXor:
      Result := okInfix;
    oNand:
      Result := okInfix;
    oNor:
      Result := okInfix;
    oNot:
      Result := okPrefix;
    oUnion:
      Result := okInfix;
    oIntersection:
      Result := okInfix;
    oSetDifference:
      Result := okInfix;
    oSymDiff:
      Result := okInfix;
    oComplement:
      Result := okPrefix;
    oIn:
      Result := okInfix;
    oNi:
      Result := okInfix;
    oNotIn:
      Result := okInfix;
    oNotNi:
      Result := okInfix;
    oSubset:
      Result := okInfix;
    oSubsetEq:
      Result := okInfix;
    oSubsetNEq:
      Result := okInfix;
    oSuperset:
      Result := okInfix;
    oSupersetEq:
      Result := okInfix;
    oSupersetNEq:
      Result := okInfix;
    oDivides:
      Result := okInfix;
    oNDivides:
      Result := okInfix;
    oParallel:
      Result := okInfix;
    oNParallel:
      Result := okInfix;
    oOrthogonal:
      Result := okInfix;
    oCircledPlus:
      Result := okInfix;
    oCircledTimes:
      Result := okInfix;
    oCircledDot:
      Result := okInfix;
    oImpliesRight:
      Result := okInfix;
    oImpliesLeft:
      Result := okInfix;
    oEquivalent:
      Result := okInfix;
    oTo:
      Result := okInfix;
    oMapsTo:
      Result := okInfix;
    oAssign:
      Result := okInfix;
    oTilde:
      Result := okInfix;
    oNumberSignPrefix:
      Result := okPrefix;
    oNumberSignPostfix:
      Result := okPostfix;
    oSection:
      Result := okPrefix;
    oBackslash:
      Result := okInfix;
  else
    raise ESyntaxException.Create('TOperatorHelper.Kind: Unknown operator.');
  end;
end;

function TOperatorHelper.ListLeft: Boolean;
begin
  Result := Self in [oMapsTo, oAssign];
end;

function TOperatorHelper.ListRight: Boolean;
begin
  Result := Self in [oAssign];
end;

function TOperatorHelper.Precedence: TPrecGroup;
begin
  case Self of
    oPeriod:
      Result := pg(1);
    oExclamation, oPercent, oPermille, oDegrees, oAsterisk, oQuestion, oNumberSignPostfix:
      Result := pg(3);
    oAmp, oUnaryPlus, oUnaryMinus, oNot, oComplement, oNumberSignPrefix, oSqrt, oSection:
      Result := pg(4, osRight);
    oCircumflex:
      Result := pg(5, osRight);
    oLooseUnaryMinus, oLooseUnaryPlus:
      Result := pg(6, osRight);
    oCDot, oCross, oSolidus, oCRing, oAt:
      Result := pg(10);
    oCircledTimes, oCircledDot:
      Result := pg(14);
    oPlus, oMinus, oCircledPlus:
      Result := pg(16);
    oUnion, oIntersection, oSetDifference, oSymDiff, oTilde:
      Result := pg(20);
    oIn, oNi, oNotIn, oNotNi:
      Result := pg(22);
    oEquals, oNotEquals, oLessThan, oLessEqual, oGreaterThan, oGreaterEqual,
    oApprox, oSubset, oSubsetEq, oSubsetNEq, oSuperset, oSupersetEq,
    oSupersetNEq, oDivides, oNDivides, oParallel, oNParallel, oOrthogonal, oTo:
      Result := pg(24);
    oAnd, oNand:
      Result := pg(26);
    oOr, oXor, oNor:
      Result := pg(28);
    oImpliesRight, oImpliesLeft, oEquivalent:
      Result := pg(30);
    oColon:
      Result := pg(32);
    oMapsTo:
      Result := pg(36);
    oBar, oBackslash:
      Result := pg(40);
    oAssign:
      Result := pg(90, osRight);
    oSemicolon:
      Result := pg(100);
  else
    raise ESyntaxException.Create('TOperatorHelper.Precedence: Unknown operator.');
  end;
end;

function TOperatorHelper.Collapse: Boolean;
begin
  Result := Self in [oPlus, oCDot, oCross, oAnd, oOr, oSemicolon, oTilde,
    oCircledPlus, oLessThan, oLessEqual, oGreaterThan, oGreaterEqual, oEquals];
end;

function TOperatorHelper.&Function: TASFunctionClass;
begin
  case Self of 
    oUnaryPlus, oLooseUnaryPlus:
      Result := FCN_Identity;
    oUnaryMinus, oLooseUnaryMinus:
      Result := FCN_UnaryMinus;
    oPlus:
      Result := FCN_Add;
    oMinus:
      Result := FCN_Subtract;
    oAsterisk:
      Result := FCN_ConjugateTranspose;
    oSolidus:
      Result := FCN_Divide;
    oCircumflex:
      Result := FCN_Power;
    oCDot:
      Result := FCN_Multiply;
//    oCRing:
//      Result := FCN_Compose;
    oSqrt:
      Result := FCN_sqrt;
    oCross:
      Result := FCN_Cross;
    oExclamation:
      Result := FCN_Factorial;
//    oQuestion: ;
    oPercent:
      Result := FCN_Percent;
    oPermille:
      Result := FCN_Permille;
    oDegrees:
      Result := FCN_Deg;
    oPeriod:
      Result := FCN_AccessMember;
    oBar:
      Result := FCN_InnerProduct;
//    oAmp: ;
    oAt:
      Result := FCN_Apply;
    oColon:
      Result := FCN_MakeMember;
    oSemicolon:
      Result := FCN_Do;
    oLessThan:
      Result := FCN_LessThan;
    oLessEqual:
      Result := FCN_LessThanOrEqualTo;
    oGreaterThan:
      Result := FCN_GreaterThan;
    oGreaterEqual:
      Result := FCN_GreaterThanOrEqualTo;
    oEquals:
      Result := FCN_Equals;
    oNotEquals:
      Result := FCN_NotEquals;
    oApprox:
      Result := FCN_SameValue;
    oAnd:
      Result := FCN_And;
    oOr:
      Result := FCN_Or;
    oXor:
      Result := FCN_Xor;
    oNand:
      Result := FCN_Nand;
    oNor:
      Result := FCN_Nor;
    oNot:
      Result := FCN_Not;    
    oUnion:
      Result := FCN_Union;
    oIntersection:
      Result := FCN_Intersection;
    oSetDifference:
      Result := FCN_SetDifference;
    oSymDiff:
      Result := FCN_SymDiff;
    oComplement:
      Result := FCN_Complement;
    oIn:
      Result := FCN_ElementOf;
    oNi:
      Result := FCN_ContainsAsElement;
    oNotIn:
      Result := FCN_NotElementOf;
    oNotNi:
      Result := FCN_NotContainsAsElement;
    oSubset, oSubsetEq:
      Result := FCN_Subset;
    oSubsetNEq:
      Result := FCN_ProperSubset;
    oSuperset, oSupersetEq:
      Result := FCN_Superset;
    oSupersetNEq:
      Result := FCN_ProperSuperset;
    oDivides:
      Result := FCN_Divides;
    oNDivides:
      Result := FCN_NotDivides;
    oParallel:
      Result := FCN_AreParallel;
    oNParallel:
      Result := FCN_AreNotParallel;
    oOrthogonal:
      Result := FCN_ArePerpendicular;
    oCircledPlus:
      Result := FCN_DirectSum;
    oCircledTimes:
      Result := FCN_OuterProduct;
    oCircledDot:
      Result := FCN_HadamardProduct;
    oImpliesRight:
      Result := FCN_ImpliesRight;
    oImpliesLeft:
      Result := FCN_ImpliesLeft;
    oEquivalent:
      Result := FCN_Equivalent;
//    oTo: ;
    oMapsTo:
      Result := FCN_CreateFunction;
    oAssign:
      Result := FCN_Assign;
    oTilde:
      Result := FCN_Catenate;
    oNumberSignPrefix:
      Result := FCN_Length;
    oNumberSignPostfix:
      Result := FCN_Primorial;
    oSection:
      Result := FCN_Output;
    oBackslash:
      Result := FCN_SetMaxLen;
  else
//    raise ESyntaxException.Create('TOperatorHelper.Function: Unknown operator.');
    Result := FCN_Identity;
  end;
end;

{ TToken }

constructor TToken.CreateOp(AOperator: TOperator; APos, ABracketID: Integer);
begin
  Kind := tkOperator;
  &Operator := AOperator;
  Pos := APos;
  BracketID := ABracketID;
end;

constructor TToken.CreateLit(const AInteger: Int64; APos: Integer);
begin
  Kind := tkIntegerLiteral;
  Literal.Integer := AInteger;
  Pos := APos;
end;

constructor TToken.CreateLit(const AFloat: Extended; APos: Integer);
begin
  Kind := tkFloatLiteral;
  Literal.Float := AFloat;
  Pos := APos;
end;

constructor TToken.CreateLit(const AString: string; APos: Integer);
begin
  Kind := tkStringLiteral;
  Literal.&String := AString;
  Pos := APos;
end;

constructor TToken.CreateNull(APos: Integer);
begin
  Kind := tkImplicitNull;
  Pos := APos;
end;

constructor TToken.CreateBracket(ABracket: TBracket; AClosing, AApplying: Boolean;
  APos: Integer; APartner: Integer);
begin
  Kind := tkBracket;
  Bracket.ID := ABracket;
  Bracket.Closing := AClosing;
  Bracket.Applying := AApplying;
  Bracket.Partner := APartner;
  Pos := APos;
end;

constructor TToken.CreateComma(APos: Integer);
begin
  Kind := tkComma;
  Pos := APos;
end;

constructor TToken.CreateIdent(const AName: string; APos: Integer);
begin
  Kind := tkIdentifier;
  Identifier := AName;
  Pos := APos;
end;

function TToken.ToString: string;
begin
  case Kind of
    tkIntegerLiteral:
      Result := 'Integer literal ' + Literal.Integer.ToString;
    tkFloatLiteral:
      Result := 'Float literal ' + Literal.Float.ToString(DefaultFormatSettings);
    tkStringLiteral:
      Result := 'String literal "' + Literal.&String + '"';
    tkOperator:
      Result := Capitalize(&Operator.Kind.ToString) + ' operator ' + &Operator.Symbol;
    tkBracket:
      Result := 'Bracket ' + Brackets[Bracket.ID, Bracket.Closing] + IfThen(Bracket.Applying, ' applying on left value') + ' – partner at idx ' + Bracket.Partner.ToString;
    tkComma:
      Result := 'Comma';
    tkIdentifier:
      Result := 'Identifier "' + Identifier + '"';
    tkImplicitNull:
      Result := 'Implicit null';
  else
    Result := 'Unknown token';
  end;
end;

function TToken.IsLeftBracket: Boolean;
begin
  Result := (Kind = tkBracket) and Bracket.Opening;
end;

function TToken.IsLeftOpBracket: Boolean;
begin
  Result := IsLeftBracket and (Bracket.ID in [bInterval, bCurly, bVector, bFloor, bCeil])
end;

function TToken.IsLeftNoOpBracket: Boolean;
begin
  Result := IsLeftBracket and (Bracket.ID in [bRound, bSquare]);
end;

function TToken.IsRightBracket: Boolean;
begin
  Result := (Kind = tkBracket) and Bracket.Closing;
end;

function TToken.IsLiteral: Boolean;
begin
  Result := Kind in [tkIntegerLiteral, tkFloatLiteral, tkStringLiteral];
end;

function TToken.IsLiteralOrIdent: Boolean;
begin
  Result := IsLiteral or (Kind = tkIdentifier);
end;

function TToken.IsValueStart: Boolean;
begin
  Result := IsLiteralOrIdent or (IsLeftBracket and not BracketApplies[Bracket.ID]) or
    ((Kind = tkOperator) and (&Operator.Kind = okPrefix));
end;

function TToken.IsValueEnd: Boolean;
begin
  Result := IsLiteralOrIdent or IsRightBracket or
    ((Kind = tkOperator) and (&Operator.Kind = okPostfix));
end;

function TToken.Precedence: TPrecGroup;
begin
  case Kind of
    tkOperator:
      Result := &Operator.Precedence;
    tkBracket:
      Result := Bracket.Precedence;
    tkComma:
      Result := pg(80);
  else
    Result := pg(0);
  end;
end;

{ TTokenizer }

class constructor TTokenizer.ClassCreate;
begin

  FInvFS := TFormatSettings.Invariant;

  FOpDict := TDictionary<Char, TOperator>.Create;

  with FOpDict do
  begin
    Add('+', oPlus);
    Add('-', oMinus);
    Add('−', oMinus);
    Add('*', oAsterisk);
    Add('/', oSolidus);
    Add('⁄', oSolidus);
    Add('∕', oSolidus);
    Add('^', oCircumflex);
    Add('⋅', oCDot);
    Add('∘', oCRing);
    Add('√', oSqrt);
    Add('×', oCross);
    Add('!', oExclamation);
    Add('?', oQuestion);
    Add('%', oPercent);
    Add('‰', oPermille);
    Add('°', oDegrees);
    Add('.', oPeriod);
    Add('|', oBar);
    Add('&', oAmp);
    Add('@', oAt);
    Add(':', oColon);
    Add(';', oSemicolon);
    Add('<', oLessThan);
    Add('≤', oLessEqual);
    Add('>', oGreaterThan);
    Add('≥', oGreaterEqual);
    Add('=', oEquals);
    Add('≠', oNotEquals);
    Add('≈', oApprox);
    Add('∧', oAnd);
    Add('∨', oOr);
    Add('⊻', oXor);
    Add('⊼', oNand);
    Add('⊽', oNor);
    Add('¬', oNot);
    Add('∪', oUnion);
    Add('∩', oIntersection);
    Add('∖', oSetDifference);
    Add('∆', oSymDiff);
    Add('∁', oComplement);
    Add('∈', oIn);
    Add('∋', oNi);
    Add('∉', oNotIn);
    Add('∌', oNotNi);
    Add('⊂', oSubset);
    Add('⊆', oSubsetEq);
    Add('⊊', oSubsetNEq);
    Add('⊃', oSuperset);
    Add('⊇', oSupersetEq);
    Add('⊋', oSupersetNEq);
    Add('∣', oDivides);
    Add('∤', oNDivides);
    Add('∥', oParallel);
    Add('∦', oNParallel);
    Add('⟂', oOrthogonal);
    Add('⊕', oCircledPlus);
    Add('⊗', oCircledTimes);
    Add('⊙', oCircledDot);
    Add('⇒', oImpliesRight);
    Add('⇐', oImpliesLeft);
    Add('⇔', oEquivalent);
    Add('→', oTo);
    Add('↦', oMapsTo);
    Add('≔', oAssign);
    Add('~', oTilde);
    Add('∼', oTilde);
    Add('#', oNumberSignPostfix);
    Add('§', oSection);
    Add('\', oBackslash);
  end;

end;

class destructor TTokenizer.ClassDestroy;
begin
  FreeAndNil(FOpDict);
end;

class function TTokenizer.TryChrToOp(AChar: Char; AExprExpected: Boolean;
  out AOperator: TOperator): Boolean;
begin
  Result := FOpDict.TryGetValue(AChar, AOperator);
  if AExprExpected then
    case AOperator of
      oPlus:
        AOperator := oLooseUnaryPlus;
      oMinus:
        AOperator := oLooseUnaryMinus;
      oNumberSignPostfix:
        AOperator := oNumberSignPrefix;
    end;
end;

class function TTokenizer.IsOperator(AChar: Char): Boolean;
begin
  Result := FOpDict.ContainsKey(AChar);
end;

class function TTokenizer.TryChrToBracket(AChar: Char; out ABracket: TBracket;
  out AClosing: Boolean): Boolean;
var
  LBracket: TBracket;
  LClosing: Boolean;
begin
  for LBracket := Low(TBracket) to High(TBracket) do
    for LClosing := Low(Boolean) to High(Boolean) do
      if AChar = Brackets[LBracket, LClosing] then
      begin
        ABracket := LBracket;
        AClosing := LClosing;
        Exit(True);
      end;
  Result := False;
end;

class function TTokenizer.Tokenize(AExpression: string): TObjectList<TToken>;
const
  DecDigits = ['0'..'9'];
  ASCIINumLitChrs = ['0'..'9', 'A'..'Z', 'a'..'z', '.', '+', '-', '#'];
  AllDigits = ['0'..'9', 'A'..'Z', 'a'..'z'];
  Quote = '"';
  Period = '.';
  Comma = ',';
  MinusSign = #$2212;
  NumberSign = '#';
type
  TBracketRec = record
    Bracket: TBracket;
    Pos: Integer; {argument chr idx}
    Idx: Integer; {result token idx}
  end;

  function br(ABracket: TBracket; APos, AIdx: Integer): TBracketRec; inline;
  begin
    Result.Bracket := ABracket;
    Result.Pos := APos;
    Result.Idx := AIdx;
  end;

var
  i: Integer;
  InIdent: Boolean;
  InNumber: Boolean;
  InString: Boolean;
  TokenStart: Integer;
  C, PrevC: Char;
  S: string;
  SLen: Integer;
  Op: TOperator;
  BracketStack: TStack<TBracketRec>;
  TopBracket: TBracketRec;
  Bracket: TBracket;
  Closing: Boolean;
  NumberSignPos: Integer;
  ExpPos: Integer;
  DecimalSepPos: Integer;
  PrevOp: TOperator;
  PrevBracket: TBracket;
  PrevClosing: Boolean;
  ThisBracket: TToken;
  InstanceKind: TOpKind;
  Applying: Boolean;

  function BracketStackString: string;
  var
    i: Integer;
    BracketRec: TBracketRec;
  begin
    SetLength(Result, BracketStack.Count);
    i := 1;
    for BracketRec in BracketStack do
    begin
      Result[i] := Brackets[BracketRec.Bracket, False];
      Inc(i);
    end;
  end;

  function TokenText(AEnd: Integer): string;
  begin
    Result := Copy(AExpression, TokenStart, AEnd - TokenStart + 1);
  end;

  function TokenTextTrunc(AEnd: Integer; AMaxLen: Integer = 64): string;
  const
    Ellipsis = '...';
  begin
    Result := Copy(AExpression, TokenStart, Min(AEnd - TokenStart + 1, AMaxLen));
    if AEnd - TokenStart + 1 > AMaxLen then
      Result := Result + Ellipsis;
  end;

  procedure CheckAdjLit;
  begin
    if
      (Result.Count >=2)
        and
      Result.Last.IsValueStart
        and
      Result[Result.Count - 2].IsValueEnd
    then
      raise ESyntaxException.CreateFmt(SAdjValues, [Result.Last.Pos]);
  end;

  procedure IdentEndingAt(AEnd: Integer);
  begin
    Result.Add(
      TToken.CreateIdent(TokenText(AEnd), TokenStart)
    );
    CheckAdjLit;
  end;

  procedure NumberEndingAt(AEnd: Integer);
    procedure InvNumLit;
    begin
      raise ESyntaxException.CreateFmt(SInvalidNumLit,
        [TokenTextTrunc(AEnd), TokenStart]);
    end;
  var
    IntVal: Int64;
    FltVal: Extended;
    Base: Integer;
    j: Integer;
    C: Char;
    Digit: Integer;
  const
    MaxBase = 10 + Ord('Z') - Ord('A') + 1;
  begin
    if NumberSignPos = -1 then
      InvNumLit;
    if DecimalSepPos = -1 then
      InvNumLit;
    if NumberSignPos = 0 then
    begin
      if ExpPos = -1 then
        InvNumLit;
      if (DecimalSepPos = 0) and (ExpPos = 0) and TryStrToInt64(TokenText(AEnd), IntVal) then
        Result.Add(TToken.CreateLit(IntVal, TokenStart))
      else if TryStrToFloat(TokenText(AEnd), FltVal, FInvFS) then
        Result.Add(TToken.CreateLit(FltVal, TokenStart))
      else
        InvNumLit;
    end
    else
    begin
      if DecimalSepPos <> 0 then
        InvNumLit;
      if not TryStrToInt(TokenText(NumberSignPos - 1), Base) then
        InvNumLit;
      if not InRange(Base, 2, MaxBase) then
        raise ESyntaxException.CreateFmt(SInvNumBase,
          [Base, TokenTextTrunc(AEnd), TokenStart]);
      IntVal := 0;
      try
        for j := NumberSignPos + 1 to AEnd do
        begin
          C := AExpression[j];
          Digit := 0; // W1036: Delphi has no "noreturn" to decorate InvNumLit with
          if InRange(Ord(C), Ord('0'), Ord('9')) then
            Digit := Ord(C) - Ord('0')
          else if InRange(Ord(C), Ord('A'), Ord('Z')) then
            Digit := 10 + Ord(C) - Ord('A')
          else if InRange(Ord(C), Ord('a'), Ord('z')) then
            Digit := 10 + Ord(C) - Ord('a')
          else
            InvNumLit;
          if Digit >= Base then
            raise ESyntaxException.CreateFmt(SInvDigit,
              [Base, C, TokenTextTrunc(AEnd), TokenStart]);
          {$IFOPT Q-}
            {$DEFINE OverflowCheckingWasDisabled}
            {$Q+}
          {$ELSE}
            {$UNDEF OverflowCheckingWasDisabled}
          {$ENDIF}
          Inc(IntVal, Digit);
          if j < AEnd then
            IntVal := IntVal * Base;
          {$IFDEF OverflowCheckingWasDisabled}
            {$Q-}
            {$UNDEF OverflowCheckingWasDisabled}
          {$ENDIF}
        end;
      except
        on EIntOverflow do
          InvNumLit;
      end;
      Result.Add(TToken.CreateLit(IntVal, TokenStart));
    end;
    CheckAdjLit;
  end;

  function IsNumLitChr(const C: Char): Boolean; inline;
  begin
    Result := CharInSet(C, ASCIINumLitChrs) or (C = MinusSign);
  end;

  function IsSyntaxElement(const C: Char): Boolean; inline;
  var
    DummyBracket: TBracket;
    DummyClosing: Boolean;
  begin
    Result := TryChrToBracket(C, DummyBracket, DummyClosing) or (C = Comma) or
      (C = Quote) or IsOperator(C);
  end;

  procedure PartialEnd;
  begin
    if (Result.Count > 0) and (Result.Last.Kind = tkOperator) and
      (Result.Last.&Operator.Kind <> okPostfix)
    then
    begin
      if Result.Last.&Operator.&Function = FCN_Do then
        Result.Add(TToken.CreateNull(i)) // insert implicit null
      else
        raise ESyntaxException.CreateFmt(SNoRightOperand,
          [Result.Last.&Operator.Symbol, Result.Last.Pos]);
    end;

    if (Result.Count > 0) and (Result.Last.Kind = tkComma) then
      raise ESyntaxException.CreateFmt(SNothingAfterComma,
        [Result.Last.Pos]);
  end;

var
  TokenList: TObjectList<TToken> absolute Result;

  function ExprExpected: Boolean;
  begin
    Result :=
      (TokenList.Count = 0)
        or
      TokenList.Last.IsLeftBracket
        or
      (TokenList.Last.Kind = tkComma)
        or
      ((TokenList.Last.Kind = tkOperator) and (TokenList.Last.&Operator.Kind <> okPostfix))
  end;

  function PeekChar: Char;
  begin
    if i = AExpression.Length then
      Result := #0
    else
      Result := AExpression[i + 1];
  end;

begin

  Result := TObjectList<TToken>.Create;
  try

    BracketStack := TStack<TBracketRec>.Create;
    try

      TokenStart := 0;
      NumberSignPos := 0;
      ExpPos := 0;
      DecimalSepPos := 0;
      InIdent := False;
      InNumber := False;
      InString := False;
      PrevC := #0;
      SLen := 0;
      i := 1;
      while i <= AExpression.Length do
      begin

        C := AExpression[i];

        if InString then
        begin
          if C = Quote then
          begin
            if (i = AExpression.Length) or (AExpression[i + 1] <> Quote) then
            begin
              InString := False;
              SetLength(S, SLen);
              Result.Add(TToken.CreateLit(S, TokenStart));
              CheckAdjLit;
            end
            else
            begin
              if S.Length = SLen then
                SetLength(S, 2 * SLen + 1);
              Inc(SLen);
              S[SLen] := Quote;
              Inc(i);
            end;
          end
          else
          begin
            if S.Length = SLen then
              SetLength(S, 2 * SLen + 1);
            Inc(SLen);
            S[SLen] := C;
          end;
        end

        else if InNumber then
        begin
          if
            not IsNumLitChr(C)
              or
            (
              C.IsInArray(['+', '-', MinusSign])
                and
              not ((PrevC.IsInArray(['e', 'E'])) and (NumberSignPos = 0))
            )
              or
            (
              (C = NumberSign)
                and
              not CharInSet(PeekChar, AllDigits)
            )
          then
          begin
            InNumber := False;
            NumberEndingAt(i - 1);
            Continue{same i};
          end
          else
          begin
            case C of
              NumberSign:
                begin
                  if NumberSignPos = 0 then
                    NumberSignPos := i
                  else
                    NumberSignPos := -1; // signalling invalid numeric literal (with >= two number signs)
                end;
              'E', 'e':
                begin
                  if ExpPos = 0 then
                    ExpPos := i
                  else
                    ExpPos := -1;
                end;
              Period:
                begin
                  if DecimalSepPos = 0 then
                    DecimalSepPos := i
                  else
                    DecimalSepPos := -1;
                end;
              MinusSign:
                AExpression[i] := '-';
            end;
          end;
        end

        else if InIdent then
        begin
          if C.IsWhiteSpace or IsSyntaxElement(C) then
          begin
            InIdent := False;
            IdentEndingAt(i - 1);
            Continue{same i};
          end;
        end

        else if TryChrToBracket(C, Bracket, Closing) then
        begin
          if Closing then
            PartialEnd;
          Applying := not Closing and (Result.Count > 0) and Result.Last.IsValueEnd;

          // Handle 5!(6)
          if Applying and (Result.Last.Kind = tkOperator) and (Result.Last.&Operator.Kind = okPostfix) then
            raise ESyntaxException.CreateFmt(SAdjValues, [i]);

          Result.Add(
            TToken.CreateBracket(Bracket, Closing, Applying, i)
          );
          if Closing then
          begin
            if BracketStack.Count = 0 then
              raise ESyntaxException.CreateFmt(SNegativeBracketLevel, [i]);
            TopBracket := BracketStack.Pop;
            if (Bracket = bSquare) and (TopBracket.Bracket = bInterval) then
              Bracket := bInterval;
            if TopBracket.Bracket <> Bracket then
              raise ESyntaxException.CreateFmt(SWrongBracket,
                [Brackets[Bracket, True], i, Brackets[TopBracket.Bracket, False],
                 TopBracket.Pos]);
            Result[TopBracket.Idx].Bracket.Partner := Result.Count - 1;
            Result.Last.Bracket.Partner := TopBracket.Idx;
            if (Result.Count >= 2) and Result[Result.Count - 2].IsLeftBracket then
            begin
              ThisBracket := Result[Result.Count - 2];
              if not BracketEmpty[ThisBracket.Bracket.ID, ThisBracket.Bracket.Applying] then
                raise ESyntaxException.CreateFmt(SEmptyBracket, [ThisBracket.Pos]);
            end;
          end
          else
          begin
            BracketStack.Push(br(Bracket, i, Result.Count - 1));
            CheckAdjLit;
          end;
        end

        else if C = Comma then
        begin
          if (Result.Count = 0) or Result.Last.IsLeftBracket then
            raise ESyntaxException.CreateFmt(SNothingBeforeComma, [i]);
          PartialEnd;
          Result.Add(TToken.CreateComma(i));
          if BracketStack.Count > 0 then
          begin
            TopBracket := BracketStack.Peek;
            if (TopBracket.Bracket = bSquare) and not Result[TopBracket.Idx].Bracket.Applying then
              Result[TopBracket.Idx].Bracket.ID := bInterval;
          end;
        end

        else if C = Quote then
        begin
          SetLength(S, 1024);
          SLen := 0;
          InString := True;
          TokenStart := i;
        end

        else if CharInSet(C, DecDigits) or ((C = Period) and ExprExpected) then
        begin
          InNumber := True;
          TokenStart := i;
          NumberSignPos := 0;
          ExpPos := 0;
          DecimalSepPos := 0;
        end

        else if TryChrToOp(C, ExprExpected, Op) then
        begin

          if (Op.Kind <> okPrefix) and ExprExpected then
            raise ESyntaxException.CreateFmt(SNoLeftOperand, [Op.Symbol, i]);

          // Handle 2^-1
          if
            (Op in [oLooseUnaryPlus, oLooseUnaryMinus])
              and
            (Result.Count > 0)
              and
            (Result.Last.Kind = tkOperator)
              and
            (
              (Result.Last.&Operator = oCircumflex)
                or
              (Result.Last.&Operator.Kind = okPrefix)
            )
          then
            Op := Pred(Op);

          if BracketStack.Count = 0 then
            Result.Add(TToken.CreateOp(Op, i, -1))
          else
            Result.Add(TToken.CreateOp(Op, i, BracketStack.Peek.Idx));
          CheckAdjLit;

        end

        else if not C.IsWhiteSpace then
        begin
          InIdent := True;
          TokenStart := i;
        end;

        Inc(i);
        PrevC := C;

      end;

      if InString then
      begin
        Inc(TokenStart); // #1: don't include the initial quotation mark
        raise ESyntaxException.CreateFmt(SUnterminatedStringLiteral,
          [TokenTextTrunc(AExpression.Length), TokenStart - 1]); // #2: -1 restores col idx
      end;

      if BracketStack.Count <> 0 then
        raise ESyntaxException.CreateFmt(SOpenBracketAtEOE, [BracketStackString]);

      if InNumber then
        NumberEndingAt(AExpression.Length);

      if InIdent then
        IdentEndingAt(AExpression.Length);

      PartialEnd;

    finally
      BracketStack.Free;
    end;

  except
    Result.Free;
    raise;
  end;

end;

{ TToken.TBracketRec }

function TToken.TBracketRec.Opening: Boolean;
begin
  Result := not Closing;
end;

{ TOpAssocHelper }

function TOpAssocHelper.ToString: string;
begin
  case Self of
    osLeft:
      Result := 'left';
    osRight:
      Result := 'right';
  else
    Result := '';
  end;
end;

end.