unit ASTokenizer;
{$WARN SYMBOL_PLATFORM OFF}
{$WARN DUPLICATE_CTOR_DTOR OFF}
interface
uses
SysUtils, Types, Classes, Generics.Defaults, Generics.Collections,
ASKernelDefs, ASExpression;
type
TOpKindHelper = record helper for TOpKind
function ToString: string;
end;
TOpAssoc = (osLeft, osRight);
TOpAssocHelper = record helper for TOpAssoc
function ToString: string;
end;
TPrecGroup = record
Precedence: Integer;
Associativity: TOpAssoc;
end;
TOperatorHelper = record helper for TOperator
function Symbol: Char;
function Kind: TOpKind;
function Precedence: TPrecGroup;
function &Function: TASFunctionClass;
function ListLeft: Boolean; inline;
function ListRight: Boolean; inline;
function Collapse: Boolean; inline;
end;
TBracket = (bRound, bCurly, bSquare, bInterval, bVector, bFloor, bCeil);
const
Brackets: array[TBracket, Boolean] of Char =
(
(
'(',
')'
),
(
'{',
'}'
),
(
'[',
']'
),
(
'[',
']'
),
(
'❨',
'❩'
),
(
'⌊',
'⌋'
),
(
'⌈',
'⌉'
)
);
BracketApplies: array[TBracket] of Boolean =
(True, False, True, False, False, False, False);
BracketEmpty: array[TBracket, Boolean] of Boolean =
(
(
False,
True
),
(
True,
True
),
(
False,
False
),
(
False,
False
),
(
False,
False
),
(
False,
False
),
(
False,
False
)
);
type
TToken = class
public
type
TBracketRec = record
ID: TBracket;
Closing: Boolean;
Applying: Boolean;
Partner: Integer;
function Opening: Boolean; inline;
const
Precedence: TPrecGroup =
(
Precedence: 1;
Associativity: osLeft;
);
end;
TLiteralRec = record
Integer: Int64;
Float: Extended;
&String: string;
end;
var
Kind: TTokenKind;
&Operator: TOperator;
BracketID: Integer;
Bracket: TBracketRec;
Literal: TLiteralRec;
Identifier: string;
Pos: Integer;
function ToString: string; override;
function IsLeftBracket: Boolean; inline;
function IsLeftOpBracket: Boolean; inline;
function IsLeftNoOpBracket: Boolean; inline;
function IsRightBracket: Boolean; inline;
function IsLiteral: Boolean; inline;
function IsLiteralOrIdent: Boolean; inline;
function IsValueStart: Boolean; inline;
function IsValueEnd: Boolean; inline;
function Precedence: TPrecGroup;
private
constructor CreateOp(AOperator: TOperator; APos, ABracketID: Integer);
constructor CreateLit(const AInteger: Int64; APos: Integer); overload;
constructor CreateLit(const AFloat: Extended; APos: Integer); overload;
constructor CreateLit(const AString: string; APos: Integer); overload;
constructor CreateBracket(ABracket: TBracket; AClosing, AApplying: Boolean;
APos: Integer; APartner: Integer = 0);
constructor CreateComma(APos: Integer);
constructor CreateIdent(const AName: string; APos: Integer);
constructor CreateNull(APos: Integer);
end;
TTokenizer = record
strict private
class var FOpDict: TDictionary<Char, TOperator>;
class var FInvFS: TFormatSettings;
class constructor ClassCreate;
class destructor ClassDestroy;
class function TryChrToOp(AChar: Char; AExprExpected: Boolean;
out AOperator: TOperator): Boolean; static; inline;
class function IsOperator(AChar: Char): Boolean; static; inline;
class function TryChrToBracket(AChar: Char; out ABracket: TBracket;
out AClosing: Boolean): Boolean; static;
public
class function Tokenize(AExpression: string): TObjectList<TToken>; static;
end;
implementation
uses
Math, StrUtils, Character, ASFunctions;
function pg(APrecedence: Integer; AAssociativity: TOpAssoc = osLeft): TPrecGroup; inline;
begin
Result.Precedence := APrecedence;
Result.Associativity := AAssociativity;
end;
function Capitalize(const AText: string): string;
var
i: Integer;
begin
Result := AText;
for i := 1 to Result.Length do
if Result[i].IsLetter then
begin
Result[i] := Result[i].ToUpper;
Break;
end;
end;
function TOpKindHelper.ToString: string;
begin
case Self of
okPrefix:
Result := 'prefix';
okPostfix:
Result := 'postfix';
okInfix:
Result := 'infix';
else
Result := '';
end;
end;
function TOperatorHelper.Symbol: Char;
begin
case Self of
oUnaryPlus, oPlus, oLooseUnaryPlus:
Result := '+';
oUnaryMinus, oMinus, oLooseUnaryMinus:
Result := '−';
oAsterisk:
Result := '*';
oSolidus:
Result := '/';
oCircumflex:
Result := '^';
oCDot:
Result := '⋅';
oCRing:
Result := '∘';
oSqrt:
Result := '√';
oCross:
Result := '×';
oExclamation:
Result := '!';
oQuestion:
Result := '?';
oPercent:
Result := '%';
oPermille:
Result := '‰';
oDegrees:
Result := '°';
oPeriod:
Result := '.';
oBar:
Result := '|';
oAmp:
Result := '&';
oAt:
Result := '@';
oColon:
Result := ':';
oSemicolon:
Result := ';';
oLessThan:
Result := '<';
oLessEqual:
Result := '≤';
oGreaterThan:
Result := '>';
oGreaterEqual:
Result := '≥';
oEquals:
Result := '=';
oNotEquals:
Result := '≠';
oApprox:
Result := '≈';
oAnd:
Result := '∧';
oOr:
Result := '∨';
oXor:
Result := '⊻';
oNand:
Result := '⊼';
oNor:
Result := '⊽';
oNot:
Result := '¬';
oUnion:
Result := '∪';
oIntersection:
Result := '∩';
oSetDifference:
Result := '∖';
oSymDiff:
Result := '∆';
oComplement:
Result := '∁';
oIn:
Result := '∈';
oNi:
Result := '∋';
oNotIn:
Result := '∉';
oNotNi:
Result := '∌';
oSubset:
Result := '⊂';
oSubsetEq:
Result := '⊆';
oSubsetNEq:
Result := '⊊';
oSuperset:
Result := '⊃';
oSupersetEq:
Result := '⊇';
oSupersetNEq:
Result := '⊋';
oDivides:
Result := '∣';
oNDivides:
Result := '∤';
oParallel:
Result := '∥';
oNParallel:
Result := '∦';
oOrthogonal:
Result := '⟂';
oCircledPlus:
Result := '⊕';
oCircledTimes:
Result := '⊗';
oCircledDot:
Result := '⊙';
oImpliesRight:
Result := '⇒';
oImpliesLeft:
Result := '⇐';
oEquivalent:
Result := '⇔';
oTo:
Result := '→';
oMapsTo:
Result := '↦';
oAssign:
Result := '≔';
oTilde:
Result := '∼';
oNumberSignPrefix, oNumberSignPostfix:
Result := '#';
oSection:
Result := '§';
oBackslash:
Result := '\';
else
raise ESyntaxException.Create('TOperatorHelper.Symbol: Unknown operator.');
end;
end;
function TOperatorHelper.Kind: TOpKind;
begin
case Self of
oUnaryPlus, oLooseUnaryPlus:
Result := okPrefix;
oUnaryMinus, oLooseUnaryMinus:
Result := okPrefix;
oPlus:
Result := okInfix;
oMinus:
Result := okInfix;
oAsterisk:
Result := okPostfix;
oSolidus:
Result := okInfix;
oCircumflex:
Result := okInfix;
oCDot:
Result := okInfix;
oCRing:
Result := okInfix;
oSqrt:
Result := okPrefix;
oCross:
Result := okInfix;
oExclamation:
Result := okPostfix;
oQuestion:
Result := okPostfix;
oPercent:
Result := okPostfix;
oPermille:
Result := okPostfix;
oDegrees:
Result := okPostfix;
oPeriod:
Result := okInfix;
oBar:
Result := okInfix;
oAmp:
Result := okPrefix;
oAt:
Result := okInfix;
oColon:
Result := okInfix;
oSemicolon:
Result := okInfix;
oLessThan:
Result := okInfix;
oLessEqual:
Result := okInfix;
oGreaterThan:
Result := okInfix;
oGreaterEqual:
Result := okInfix;
oEquals:
Result := okInfix;
oNotEquals:
Result := okInfix;
oApprox:
Result := okInfix;
oAnd:
Result := okInfix;
oOr:
Result := okInfix;
oXor:
Result := okInfix;
oNand:
Result := okInfix;
oNor:
Result := okInfix;
oNot:
Result := okPrefix;
oUnion:
Result := okInfix;
oIntersection:
Result := okInfix;
oSetDifference:
Result := okInfix;
oSymDiff:
Result := okInfix;
oComplement:
Result := okPrefix;
oIn:
Result := okInfix;
oNi:
Result := okInfix;
oNotIn:
Result := okInfix;
oNotNi:
Result := okInfix;
oSubset:
Result := okInfix;
oSubsetEq:
Result := okInfix;
oSubsetNEq:
Result := okInfix;
oSuperset:
Result := okInfix;
oSupersetEq:
Result := okInfix;
oSupersetNEq:
Result := okInfix;
oDivides:
Result := okInfix;
oNDivides:
Result := okInfix;
oParallel:
Result := okInfix;
oNParallel:
Result := okInfix;
oOrthogonal:
Result := okInfix;
oCircledPlus:
Result := okInfix;
oCircledTimes:
Result := okInfix;
oCircledDot:
Result := okInfix;
oImpliesRight:
Result := okInfix;
oImpliesLeft:
Result := okInfix;
oEquivalent:
Result := okInfix;
oTo:
Result := okInfix;
oMapsTo:
Result := okInfix;
oAssign:
Result := okInfix;
oTilde:
Result := okInfix;
oNumberSignPrefix:
Result := okPrefix;
oNumberSignPostfix:
Result := okPostfix;
oSection:
Result := okPrefix;
oBackslash:
Result := okInfix;
else
raise ESyntaxException.Create('TOperatorHelper.Kind: Unknown operator.');
end;
end;
function TOperatorHelper.ListLeft: Boolean;
begin
Result := Self in [oMapsTo, oAssign];
end;
function TOperatorHelper.ListRight: Boolean;
begin
Result := Self in [oAssign];
end;
function TOperatorHelper.Precedence: TPrecGroup;
begin
case Self of
oPeriod:
Result := pg(1);
oExclamation, oPercent, oPermille, oDegrees, oAsterisk, oQuestion, oNumberSignPostfix:
Result := pg(3);
oAmp, oUnaryPlus, oUnaryMinus, oNot, oComplement, oNumberSignPrefix, oSqrt, oSection:
Result := pg(4, osRight);
oCircumflex:
Result := pg(5, osRight);
oLooseUnaryMinus, oLooseUnaryPlus:
Result := pg(6, osRight);
oCDot, oCross, oSolidus, oCRing, oAt:
Result := pg(10);
oCircledTimes, oCircledDot:
Result := pg(14);
oPlus, oMinus, oCircledPlus:
Result := pg(16);
oUnion, oIntersection, oSetDifference, oSymDiff, oTilde:
Result := pg(20);
oIn, oNi, oNotIn, oNotNi:
Result := pg(22);
oEquals, oNotEquals, oLessThan, oLessEqual, oGreaterThan, oGreaterEqual,
oApprox, oSubset, oSubsetEq, oSubsetNEq, oSuperset, oSupersetEq,
oSupersetNEq, oDivides, oNDivides, oParallel, oNParallel, oOrthogonal, oTo:
Result := pg(24);
oAnd, oNand:
Result := pg(26);
oOr, oXor, oNor:
Result := pg(28);
oImpliesRight, oImpliesLeft, oEquivalent:
Result := pg(30);
oColon:
Result := pg(32);
oMapsTo:
Result := pg(36);
oBar, oBackslash:
Result := pg(40);
oAssign:
Result := pg(90, osRight);
oSemicolon:
Result := pg(100);
else
raise ESyntaxException.Create('TOperatorHelper.Precedence: Unknown operator.');
end;
end;
function TOperatorHelper.Collapse: Boolean;
begin
Result := Self in [oPlus, oCDot, oCross, oAnd, oOr, oSemicolon, oTilde,
oCircledPlus, oLessThan, oLessEqual, oGreaterThan, oGreaterEqual, oEquals];
end;
function TOperatorHelper.&Function: TASFunctionClass;
begin
case Self of
oUnaryPlus, oLooseUnaryPlus:
Result := FCN_Identity;
oUnaryMinus, oLooseUnaryMinus:
Result := FCN_UnaryMinus;
oPlus:
Result := FCN_Add;
oMinus:
Result := FCN_Subtract;
oAsterisk:
Result := FCN_ConjugateTranspose;
oSolidus:
Result := FCN_Divide;
oCircumflex:
Result := FCN_Power;
oCDot:
Result := FCN_Multiply;
oSqrt:
Result := FCN_sqrt;
oCross:
Result := FCN_Cross;
oExclamation:
Result := FCN_Factorial;
oPercent:
Result := FCN_Percent;
oPermille:
Result := FCN_Permille;
oDegrees:
Result := FCN_Deg;
oPeriod:
Result := FCN_AccessMember;
oBar:
Result := FCN_InnerProduct;
oAt:
Result := FCN_Apply;
oColon:
Result := FCN_MakeMember;
oSemicolon:
Result := FCN_Do;
oLessThan:
Result := FCN_LessThan;
oLessEqual:
Result := FCN_LessThanOrEqualTo;
oGreaterThan:
Result := FCN_GreaterThan;
oGreaterEqual:
Result := FCN_GreaterThanOrEqualTo;
oEquals:
Result := FCN_Equals;
oNotEquals:
Result := FCN_NotEquals;
oApprox:
Result := FCN_SameValue;
oAnd:
Result := FCN_And;
oOr:
Result := FCN_Or;
oXor:
Result := FCN_Xor;
oNand:
Result := FCN_Nand;
oNor:
Result := FCN_Nor;
oNot:
Result := FCN_Not;
oUnion:
Result := FCN_Union;
oIntersection:
Result := FCN_Intersection;
oSetDifference:
Result := FCN_SetDifference;
oSymDiff:
Result := FCN_SymDiff;
oComplement:
Result := FCN_Complement;
oIn:
Result := FCN_ElementOf;
oNi:
Result := FCN_ContainsAsElement;
oNotIn:
Result := FCN_NotElementOf;
oNotNi:
Result := FCN_NotContainsAsElement;
oSubset, oSubsetEq:
Result := FCN_Subset;
oSubsetNEq:
Result := FCN_ProperSubset;
oSuperset, oSupersetEq:
Result := FCN_Superset;
oSupersetNEq:
Result := FCN_ProperSuperset;
oDivides:
Result := FCN_Divides;
oNDivides:
Result := FCN_NotDivides;
oParallel:
Result := FCN_AreParallel;
oNParallel:
Result := FCN_AreNotParallel;
oOrthogonal:
Result := FCN_ArePerpendicular;
oCircledPlus:
Result := FCN_DirectSum;
oCircledTimes:
Result := FCN_OuterProduct;
oCircledDot:
Result := FCN_HadamardProduct;
oImpliesRight:
Result := FCN_ImpliesRight;
oImpliesLeft:
Result := FCN_ImpliesLeft;
oEquivalent:
Result := FCN_Equivalent;
oMapsTo:
Result := FCN_CreateFunction;
oAssign:
Result := FCN_Assign;
oTilde:
Result := FCN_Catenate;
oNumberSignPrefix:
Result := FCN_Length;
oNumberSignPostfix:
Result := FCN_Primorial;
oSection:
Result := FCN_Output;
oBackslash:
Result := FCN_SetMaxLen;
else
Result := FCN_Identity;
end;
end;
constructor TToken.CreateOp(AOperator: TOperator; APos, ABracketID: Integer);
begin
Kind := tkOperator;
&Operator := AOperator;
Pos := APos;
BracketID := ABracketID;
end;
constructor TToken.CreateLit(const AInteger: Int64; APos: Integer);
begin
Kind := tkIntegerLiteral;
Literal.Integer := AInteger;
Pos := APos;
end;
constructor TToken.CreateLit(const AFloat: Extended; APos: Integer);
begin
Kind := tkFloatLiteral;
Literal.Float := AFloat;
Pos := APos;
end;
constructor TToken.CreateLit(const AString: string; APos: Integer);
begin
Kind := tkStringLiteral;
Literal.&String := AString;
Pos := APos;
end;
constructor TToken.CreateNull(APos: Integer);
begin
Kind := tkImplicitNull;
Pos := APos;
end;
constructor TToken.CreateBracket(ABracket: TBracket; AClosing, AApplying: Boolean;
APos: Integer; APartner: Integer);
begin
Kind := tkBracket;
Bracket.ID := ABracket;
Bracket.Closing := AClosing;
Bracket.Applying := AApplying;
Bracket.Partner := APartner;
Pos := APos;
end;
constructor TToken.CreateComma(APos: Integer);
begin
Kind := tkComma;
Pos := APos;
end;
constructor TToken.CreateIdent(const AName: string; APos: Integer);
begin
Kind := tkIdentifier;
Identifier := AName;
Pos := APos;
end;
function TToken.ToString: string;
begin
case Kind of
tkIntegerLiteral:
Result := 'Integer literal ' + Literal.Integer.ToString;
tkFloatLiteral:
Result := 'Float literal ' + Literal.Float.ToString(DefaultFormatSettings);
tkStringLiteral:
Result := 'String literal "' + Literal.&String + '"';
tkOperator:
Result := Capitalize(&Operator.Kind.ToString) + ' operator ' + &Operator.Symbol;
tkBracket:
Result := 'Bracket ' + Brackets[Bracket.ID, Bracket.Closing] + IfThen(Bracket.Applying, ' applying on left value') + ' – partner at idx ' + Bracket.Partner.ToString;
tkComma:
Result := 'Comma';
tkIdentifier:
Result := 'Identifier "' + Identifier + '"';
tkImplicitNull:
Result := 'Implicit null';
else
Result := 'Unknown token';
end;
end;
function TToken.IsLeftBracket: Boolean;
begin
Result := (Kind = tkBracket) and Bracket.Opening;
end;
function TToken.IsLeftOpBracket: Boolean;
begin
Result := IsLeftBracket and (Bracket.ID in [bInterval, bCurly, bVector, bFloor, bCeil])
end;
function TToken.IsLeftNoOpBracket: Boolean;
begin
Result := IsLeftBracket and (Bracket.ID in [bRound, bSquare]);
end;
function TToken.IsRightBracket: Boolean;
begin
Result := (Kind = tkBracket) and Bracket.Closing;
end;
function TToken.IsLiteral: Boolean;
begin
Result := Kind in [tkIntegerLiteral, tkFloatLiteral, tkStringLiteral];
end;
function TToken.IsLiteralOrIdent: Boolean;
begin
Result := IsLiteral or (Kind = tkIdentifier);
end;
function TToken.IsValueStart: Boolean;
begin
Result := IsLiteralOrIdent or (IsLeftBracket and not BracketApplies[Bracket.ID]) or
((Kind = tkOperator) and (&Operator.Kind = okPrefix));
end;
function TToken.IsValueEnd: Boolean;
begin
Result := IsLiteralOrIdent or IsRightBracket or
((Kind = tkOperator) and (&Operator.Kind = okPostfix));
end;
function TToken.Precedence: TPrecGroup;
begin
case Kind of
tkOperator:
Result := &Operator.Precedence;
tkBracket:
Result := Bracket.Precedence;
tkComma:
Result := pg(80);
else
Result := pg(0);
end;
end;
class constructor TTokenizer.ClassCreate;
begin
FInvFS := TFormatSettings.Invariant;
FOpDict := TDictionary<Char, TOperator>.Create;
with FOpDict do
begin
Add('+', oPlus);
Add('-', oMinus);
Add('−', oMinus);
Add('*', oAsterisk);
Add('/', oSolidus);
Add('⁄', oSolidus);
Add('∕', oSolidus);
Add('^', oCircumflex);
Add('⋅', oCDot);
Add('∘', oCRing);
Add('√', oSqrt);
Add('×', oCross);
Add('!', oExclamation);
Add('?', oQuestion);
Add('%', oPercent);
Add('‰', oPermille);
Add('°', oDegrees);
Add('.', oPeriod);
Add('|', oBar);
Add('&', oAmp);
Add('@', oAt);
Add(':', oColon);
Add(';', oSemicolon);
Add('<', oLessThan);
Add('≤', oLessEqual);
Add('>', oGreaterThan);
Add('≥', oGreaterEqual);
Add('=', oEquals);
Add('≠', oNotEquals);
Add('≈', oApprox);
Add('∧', oAnd);
Add('∨', oOr);
Add('⊻', oXor);
Add('⊼', oNand);
Add('⊽', oNor);
Add('¬', oNot);
Add('∪', oUnion);
Add('∩', oIntersection);
Add('∖', oSetDifference);
Add('∆', oSymDiff);
Add('∁', oComplement);
Add('∈', oIn);
Add('∋', oNi);
Add('∉', oNotIn);
Add('∌', oNotNi);
Add('⊂', oSubset);
Add('⊆', oSubsetEq);
Add('⊊', oSubsetNEq);
Add('⊃', oSuperset);
Add('⊇', oSupersetEq);
Add('⊋', oSupersetNEq);
Add('∣', oDivides);
Add('∤', oNDivides);
Add('∥', oParallel);
Add('∦', oNParallel);
Add('⟂', oOrthogonal);
Add('⊕', oCircledPlus);
Add('⊗', oCircledTimes);
Add('⊙', oCircledDot);
Add('⇒', oImpliesRight);
Add('⇐', oImpliesLeft);
Add('⇔', oEquivalent);
Add('→', oTo);
Add('↦', oMapsTo);
Add('≔', oAssign);
Add('~', oTilde);
Add('∼', oTilde);
Add('#', oNumberSignPostfix);
Add('§', oSection);
Add('\', oBackslash);
end;
end;
class destructor TTokenizer.ClassDestroy;
begin
FreeAndNil(FOpDict);
end;
class function TTokenizer.TryChrToOp(AChar: Char; AExprExpected: Boolean;
out AOperator: TOperator): Boolean;
begin
Result := FOpDict.TryGetValue(AChar, AOperator);
if AExprExpected then
case AOperator of
oPlus:
AOperator := oLooseUnaryPlus;
oMinus:
AOperator := oLooseUnaryMinus;
oNumberSignPostfix:
AOperator := oNumberSignPrefix;
end;
end;
class function TTokenizer.IsOperator(AChar: Char): Boolean;
begin
Result := FOpDict.ContainsKey(AChar);
end;
class function TTokenizer.TryChrToBracket(AChar: Char; out ABracket: TBracket;
out AClosing: Boolean): Boolean;
var
LBracket: TBracket;
LClosing: Boolean;
begin
for LBracket := Low(TBracket) to High(TBracket) do
for LClosing := Low(Boolean) to High(Boolean) do
if AChar = Brackets[LBracket, LClosing] then
begin
ABracket := LBracket;
AClosing := LClosing;
Exit(True);
end;
Result := False;
end;
class function TTokenizer.Tokenize(AExpression: string): TObjectList<TToken>;
const
DecDigits = ['0'..'9'];
ASCIINumLitChrs = ['0'..'9', 'A'..'Z', 'a'..'z', '.', '+', '-', '#'];
AllDigits = ['0'..'9', 'A'..'Z', 'a'..'z'];
Quote = '"';
Period = '.';
Comma = ',';
MinusSign = #$2212;
NumberSign = '#';
type
TBracketRec = record
Bracket: TBracket;
Pos: Integer;
Idx: Integer;
end;
function br(ABracket: TBracket; APos, AIdx: Integer): TBracketRec; inline;
begin
Result.Bracket := ABracket;
Result.Pos := APos;
Result.Idx := AIdx;
end;
var
i: Integer;
InIdent: Boolean;
InNumber: Boolean;
InString: Boolean;
TokenStart: Integer;
C, PrevC: Char;
S: string;
SLen: Integer;
Op: TOperator;
BracketStack: TStack<TBracketRec>;
TopBracket: TBracketRec;
Bracket: TBracket;
Closing: Boolean;
NumberSignPos: Integer;
ExpPos: Integer;
DecimalSepPos: Integer;
PrevOp: TOperator;
PrevBracket: TBracket;
PrevClosing: Boolean;
ThisBracket: TToken;
InstanceKind: TOpKind;
Applying: Boolean;
function BracketStackString: string;
var
i: Integer;
BracketRec: TBracketRec;
begin
SetLength(Result, BracketStack.Count);
i := 1;
for BracketRec in BracketStack do
begin
Result[i] := Brackets[BracketRec.Bracket, False];
Inc(i);
end;
end;
function TokenText(AEnd: Integer): string;
begin
Result := Copy(AExpression, TokenStart, AEnd - TokenStart + 1);
end;
function TokenTextTrunc(AEnd: Integer; AMaxLen: Integer = 64): string;
const
Ellipsis = '...';
begin
Result := Copy(AExpression, TokenStart, Min(AEnd - TokenStart + 1, AMaxLen));
if AEnd - TokenStart + 1 > AMaxLen then
Result := Result + Ellipsis;
end;
procedure CheckAdjLit;
begin
if
(Result.Count >=2)
and
Result.Last.IsValueStart
and
Result[Result.Count - 2].IsValueEnd
then
raise ESyntaxException.CreateFmt(SAdjValues, [Result.Last.Pos]);
end;
procedure IdentEndingAt(AEnd: Integer);
begin
Result.Add(
TToken.CreateIdent(TokenText(AEnd), TokenStart)
);
CheckAdjLit;
end;
procedure NumberEndingAt(AEnd: Integer);
procedure InvNumLit;
begin
raise ESyntaxException.CreateFmt(SInvalidNumLit,
[TokenTextTrunc(AEnd), TokenStart]);
end;
var
IntVal: Int64;
FltVal: Extended;
Base: Integer;
j: Integer;
C: Char;
Digit: Integer;
const
MaxBase = 10 + Ord('Z') - Ord('A') + 1;
begin
if NumberSignPos = -1 then
InvNumLit;
if DecimalSepPos = -1 then
InvNumLit;
if NumberSignPos = 0 then
begin
if ExpPos = -1 then
InvNumLit;
if (DecimalSepPos = 0) and (ExpPos = 0) and TryStrToInt64(TokenText(AEnd), IntVal) then
Result.Add(TToken.CreateLit(IntVal, TokenStart))
else if TryStrToFloat(TokenText(AEnd), FltVal, FInvFS) then
Result.Add(TToken.CreateLit(FltVal, TokenStart))
else
InvNumLit;
end
else
begin
if DecimalSepPos <> 0 then
InvNumLit;
if not TryStrToInt(TokenText(NumberSignPos - 1), Base) then
InvNumLit;
if not InRange(Base, 2, MaxBase) then
raise ESyntaxException.CreateFmt(SInvNumBase,
[Base, TokenTextTrunc(AEnd), TokenStart]);
IntVal := 0;
try
for j := NumberSignPos + 1 to AEnd do
begin
C := AExpression[j];
Digit := 0;
if InRange(Ord(C), Ord('0'), Ord('9')) then
Digit := Ord(C) - Ord('0')
else if InRange(Ord(C), Ord('A'), Ord('Z')) then
Digit := 10 + Ord(C) - Ord('A')
else if InRange(Ord(C), Ord('a'), Ord('z')) then
Digit := 10 + Ord(C) - Ord('a')
else
InvNumLit;
if Digit >= Base then
raise ESyntaxException.CreateFmt(SInvDigit,
[Base, C, TokenTextTrunc(AEnd), TokenStart]);
{$IFOPT Q-}
{$DEFINE OverflowCheckingWasDisabled}
{$Q+}
{$ELSE}
{$UNDEF OverflowCheckingWasDisabled}
{$ENDIF}
Inc(IntVal, Digit);
if j < AEnd then
IntVal := IntVal * Base;
{$IFDEF OverflowCheckingWasDisabled}
{$Q-}
{$UNDEF OverflowCheckingWasDisabled}
{$ENDIF}
end;
except
on EIntOverflow do
InvNumLit;
end;
Result.Add(TToken.CreateLit(IntVal, TokenStart));
end;
CheckAdjLit;
end;
function IsNumLitChr(const C: Char): Boolean; inline;
begin
Result := CharInSet(C, ASCIINumLitChrs) or (C = MinusSign);
end;
function IsSyntaxElement(const C: Char): Boolean; inline;
var
DummyBracket: TBracket;
DummyClosing: Boolean;
begin
Result := TryChrToBracket(C, DummyBracket, DummyClosing) or (C = Comma) or
(C = Quote) or IsOperator(C);
end;
procedure PartialEnd;
begin
if (Result.Count > 0) and (Result.Last.Kind = tkOperator) and
(Result.Last.&Operator.Kind <> okPostfix)
then
begin
if Result.Last.&Operator.&Function = FCN_Do then
Result.Add(TToken.CreateNull(i))
else
raise ESyntaxException.CreateFmt(SNoRightOperand,
[Result.Last.&Operator.Symbol, Result.Last.Pos]);
end;
if (Result.Count > 0) and (Result.Last.Kind = tkComma) then
raise ESyntaxException.CreateFmt(SNothingAfterComma,
[Result.Last.Pos]);
end;
var
TokenList: TObjectList<TToken> absolute Result;
function ExprExpected: Boolean;
begin
Result :=
(TokenList.Count = 0)
or
TokenList.Last.IsLeftBracket
or
(TokenList.Last.Kind = tkComma)
or
((TokenList.Last.Kind = tkOperator) and (TokenList.Last.&Operator.Kind <> okPostfix))
end;
function PeekChar: Char;
begin
if i = AExpression.Length then
Result := #0
else
Result := AExpression[i + 1];
end;
begin
Result := TObjectList<TToken>.Create;
try
BracketStack := TStack<TBracketRec>.Create;
try
TokenStart := 0;
NumberSignPos := 0;
ExpPos := 0;
DecimalSepPos := 0;
InIdent := False;
InNumber := False;
InString := False;
PrevC := #0;
SLen := 0;
i := 1;
while i <= AExpression.Length do
begin
C := AExpression[i];
if InString then
begin
if C = Quote then
begin
if (i = AExpression.Length) or (AExpression[i + 1] <> Quote) then
begin
InString := False;
SetLength(S, SLen);
Result.Add(TToken.CreateLit(S, TokenStart));
CheckAdjLit;
end
else
begin
if S.Length = SLen then
SetLength(S, 2 * SLen + 1);
Inc(SLen);
S[SLen] := Quote;
Inc(i);
end;
end
else
begin
if S.Length = SLen then
SetLength(S, 2 * SLen + 1);
Inc(SLen);
S[SLen] := C;
end;
end
else if InNumber then
begin
if
not IsNumLitChr(C)
or
(
C.IsInArray(['+', '-', MinusSign])
and
not ((PrevC.IsInArray(['e', 'E'])) and (NumberSignPos = 0))
)
or
(
(C = NumberSign)
and
not CharInSet(PeekChar, AllDigits)
)
then
begin
InNumber := False;
NumberEndingAt(i - 1);
Continue;
end
else
begin
case C of
NumberSign:
begin
if NumberSignPos = 0 then
NumberSignPos := i
else
NumberSignPos := -1;
end;
'E', 'e':
begin
if ExpPos = 0 then
ExpPos := i
else
ExpPos := -1;
end;
Period:
begin
if DecimalSepPos = 0 then
DecimalSepPos := i
else
DecimalSepPos := -1;
end;
MinusSign:
AExpression[i] := '-';
end;
end;
end
else if InIdent then
begin
if C.IsWhiteSpace or IsSyntaxElement(C) then
begin
InIdent := False;
IdentEndingAt(i - 1);
Continue;
end;
end
else if TryChrToBracket(C, Bracket, Closing) then
begin
if Closing then
PartialEnd;
Applying := not Closing and (Result.Count > 0) and Result.Last.IsValueEnd;
if Applying and (Result.Last.Kind = tkOperator) and (Result.Last.&Operator.Kind = okPostfix) then
raise ESyntaxException.CreateFmt(SAdjValues, [i]);
Result.Add(
TToken.CreateBracket(Bracket, Closing, Applying, i)
);
if Closing then
begin
if BracketStack.Count = 0 then
raise ESyntaxException.CreateFmt(SNegativeBracketLevel, [i]);
TopBracket := BracketStack.Pop;
if (Bracket = bSquare) and (TopBracket.Bracket = bInterval) then
Bracket := bInterval;
if TopBracket.Bracket <> Bracket then
raise ESyntaxException.CreateFmt(SWrongBracket,
[Brackets[Bracket, True], i, Brackets[TopBracket.Bracket, False],
TopBracket.Pos]);
Result[TopBracket.Idx].Bracket.Partner := Result.Count - 1;
Result.Last.Bracket.Partner := TopBracket.Idx;
if (Result.Count >= 2) and Result[Result.Count - 2].IsLeftBracket then
begin
ThisBracket := Result[Result.Count - 2];
if not BracketEmpty[ThisBracket.Bracket.ID, ThisBracket.Bracket.Applying] then
raise ESyntaxException.CreateFmt(SEmptyBracket, [ThisBracket.Pos]);
end;
end
else
begin
BracketStack.Push(br(Bracket, i, Result.Count - 1));
CheckAdjLit;
end;
end
else if C = Comma then
begin
if (Result.Count = 0) or Result.Last.IsLeftBracket then
raise ESyntaxException.CreateFmt(SNothingBeforeComma, [i]);
PartialEnd;
Result.Add(TToken.CreateComma(i));
if BracketStack.Count > 0 then
begin
TopBracket := BracketStack.Peek;
if (TopBracket.Bracket = bSquare) and not Result[TopBracket.Idx].Bracket.Applying then
Result[TopBracket.Idx].Bracket.ID := bInterval;
end;
end
else if C = Quote then
begin
SetLength(S, 1024);
SLen := 0;
InString := True;
TokenStart := i;
end
else if CharInSet(C, DecDigits) or ((C = Period) and ExprExpected) then
begin
InNumber := True;
TokenStart := i;
NumberSignPos := 0;
ExpPos := 0;
DecimalSepPos := 0;
end
else if TryChrToOp(C, ExprExpected, Op) then
begin
if (Op.Kind <> okPrefix) and ExprExpected then
raise ESyntaxException.CreateFmt(SNoLeftOperand, [Op.Symbol, i]);
if
(Op in [oLooseUnaryPlus, oLooseUnaryMinus])
and
(Result.Count > 0)
and
(Result.Last.Kind = tkOperator)
and
(
(Result.Last.&Operator = oCircumflex)
or
(Result.Last.&Operator.Kind = okPrefix)
)
then
Op := Pred(Op);
if BracketStack.Count = 0 then
Result.Add(TToken.CreateOp(Op, i, -1))
else
Result.Add(TToken.CreateOp(Op, i, BracketStack.Peek.Idx));
CheckAdjLit;
end
else if not C.IsWhiteSpace then
begin
InIdent := True;
TokenStart := i;
end;
Inc(i);
PrevC := C;
end;
if InString then
begin
Inc(TokenStart);
raise ESyntaxException.CreateFmt(SUnterminatedStringLiteral,
[TokenTextTrunc(AExpression.Length), TokenStart - 1]);
end;
if BracketStack.Count <> 0 then
raise ESyntaxException.CreateFmt(SOpenBracketAtEOE, [BracketStackString]);
if InNumber then
NumberEndingAt(AExpression.Length);
if InIdent then
IdentEndingAt(AExpression.Length);
PartialEnd;
finally
BracketStack.Free;
end;
except
Result.Free;
raise;
end;
end;
function TToken.TBracketRec.Opening: Boolean;
begin
Result := not Closing;
end;
function TOpAssocHelper.ToString: string;
begin
case Self of
osLeft:
Result := 'left';
osRight:
Result := 'right';
else
Result := '';
end;
end;
end.