unit ASStrFcns;
{$WARN SYMBOL_PLATFORM OFF}
{$WARN DUPLICATE_CTOR_DTOR OFF}
interface
uses
SysUtils, Classes, Generics.Defaults, Generics.Collections;
type
TStringSearchOption = (ssoIgnoreCase, ssoWholeWords);
TStringSearchOptions = set of TStringSearchOption;
function SubstringCount(const ASubstr, AStr: string;
AStringSearchOptions: TStringSearchOptions = [];
AOffset: Integer = 0;
AList: TList<Integer> = nil): Integer;
function SubstringFirstIndex(const ASubstr, AStr: string;
AStringSearchOptions: TStringSearchOptions = [];
AOffset: Integer = 0): Integer;
function StringReplace(const AStr, APattern, ANewText: string;
AStringSearchOptions: TStringSearchOptions = [];
AOffset: Integer = 0): string;
type
TChrTransformFunc = reference to function(C: Char): Char;
TTextTransformFunc = reference to function(const AText: string): string;
function TextTransformFunc(AChrTransformFunc: TChrTransformFunc): TTextTransformFunc;
function ChrUpperCase(C: Char): Char;
function ChrLowerCase(C: Char): Char;
function ChrInvertCase(C: Char): Char;
function TxtTitleCase(const AText: string): string;
function TxtSentenceCase(const AText: string): string;
function ChrROT13(C: Char): Char;
function ChrCaesar(N: Integer): TChrTransformFunc;
function TxtVigenère(const AKey: string; ADecode: Boolean = False): TTextTransformFunc;
type
TWordExtractionOptions = record
WordSeps: string;
MathSeps: Boolean;
TrimChrs: string;
TrimPunct: Boolean;
LetterRequired: Boolean;
LetterOrDigitRequired: Boolean;
end;
const
WEO_ENGLISH: TWordExtractionOptions =
(
WordSeps: '/<>='#$2014;
MathSeps: False;
TrimChrs: '';
TrimPunct: True;
LetterRequired: True;
LetterOrDigitRequired: False;
);
WEO_ENGLISH_MATH: TWordExtractionOptions =
(
WordSeps: '/<>='#$2014'()[]{}';
MathSeps: True;
TrimChrs: '';
TrimPunct: True;
LetterRequired: True;
LetterOrDigitRequired: False;
);
WEO_ENGLISH_SOURCECODE: TWordExtractionOptions =
(
WordSeps: '/<>='#$2014'.,;:-+*()[]{}|\%&@$~!?^';
MathSeps: True;
TrimChrs: '';
TrimPunct: True;
LetterRequired: True;
LetterOrDigitRequired: False;
);
procedure GetWords(const AText: string; const AOptions: TWordExtractionOptions;
AWordList: TList<string>); overload;
function GetWords(const AText: string; const AOptions: TWordExtractionOptions): TArray<string>; overload;
function WordWrap(const AText: string; N: Integer = 80): string;
function Pad(const AString: string; ALength: Integer;
AAlignment: TAlignment; APadding: Char = #32): string;
function RemoveDuplicates(const AString: string): string;
function RemoveAdjacentDuplicates(const AString: string): string;
implementation
uses
Math, StrUtils, Character;
function SubstringScan(const ASubstr, AStr: string;
AStringSearchOptions: TStringSearchOptions = [];
AOnlyFirst: Boolean = False;
AOffset: Integer = 0;
AList: TList<Integer> = nil): Integer;
var
Substr, Str: string;
LastIndex, i, j: Integer;
function TestWholeWord: Boolean;
begin
Result :=
not (ssoWholeWords in AStringSearchOptions) or
(
((i = 1) or not Str[i - 1].IsLetterOrDigit) and
((i + j = AStr.Length + 1) or not (Str[i + j]).IsLetterOrDigit)
)
end;
begin
if Assigned(AList) then
AList.Clear;
Result := 0;
if ASubstr.IsEmpty then Exit;
LastIndex := AStr.Length - ASubstr.Length + 1;
if ssoIgnoreCase in AStringSearchOptions then
begin
Substr := AnsiLowerCase(ASubstr);
Str := AnsiLowerCase(AStr);
end
else
begin
Substr := ASubstr;
Str := AStr;
end;
i := 1 + AOffset;
while i <= LastIndex do
begin
j := 0;
while (j < Substr.Length) and (Str[i + j] = Substr[1 + j]) do Inc(j);
if (j = Substr.Length) and TestWholeWord then
begin
if AOnlyFirst then
Exit(i);
if Assigned(AList) then
AList.Add(i);
Inc(Result);
Inc(i, j);
end
else
Inc(i);
end;
end;
function SubstringCount(const ASubstr, AStr: string;
AStringSearchOptions: TStringSearchOptions = [];
AOffset: Integer = 0;
AList: TList<Integer> = nil): Integer;
begin
Result := SubstringScan(ASubstr, AStr, AStringSearchOptions, False,
AOffset, AList);
end;
function SubstringFirstIndex(const ASubstr, AStr: string;
AStringSearchOptions: TStringSearchOptions = [];
AOffset: Integer = 0): Integer;
begin
Result := SubstringScan(ASubstr, AStr, AStringSearchOptions, True, AOffset)
end;
function StringReplace(const AStr, APattern, ANewText: string;
AStringSearchOptions: TStringSearchOptions = [];
AOffset: Integer = 0): string;
var
indices: TList<Integer>;
diff, chrcnt: Integer;
i, y: Integer;
begin
indices := TList<Integer>.Create;
try
if SubstringCount(APattern, AStr, AStringSearchOptions, AOffset, indices) = 0 then
Exit(AStr);
diff := ANewText.Length - APattern.Length;
SetLength(Result, AStr.Length + diff * indices.Count);
y := 1;
if indices[0] > 1 then
Move(AStr[1], Result[y], (indices[0] - 1) * sizeof(Char));
y := indices[0];
if not ANewText.IsEmpty then
Move(ANewText[1], Result[y], ANewText.Length * sizeof(Char));
Inc(y, ANewText.Length);
for i := 0 to indices.Count - 2 do
begin
chrcnt := (indices[i + 1] - indices[i] - APattern.Length);
if chrcnt > 0 then
Move(AStr[indices[i] + APattern.Length], Result[y], chrcnt * sizeof(Char));
Inc(y, chrcnt);
if not ANewText.IsEmpty then
Move(ANewText[1], Result[y], ANewText.Length * sizeof(Char));
Inc(y, Length(ANewText));
end;
chrcnt := AStr.Length - (indices[indices.Count - 1] + APattern.Length) + 1;
if chrcnt > 0 then
Move(AStr[indices[indices.Count - 1] + APattern.Length], Result[y],
chrcnt * sizeof(Char));
finally
indices.Free;
end;
end;
function TextTransformFunc(AChrTransformFunc: TChrTransformFunc): TTextTransformFunc;
begin
Result := function(const AText: string): string
var
i: Integer;
begin
SetLength(Result, AText.Length);
for i := 1 to AText.Length do
Result[i] := AChrTransformFunc(AText[i]);
end;
end;
function imod(const x: Integer; const y: Integer): Integer;
begin
if x >= 0 then
imod := x - floor(x/y) * y
else
imod := x + ceil(-x/y) * y;
end;
function ChrUpperCase(C: Char): Char;
begin
Result := AnsiUpperCase(C)[1];
end;
function ChrLowerCase(C: Char): Char;
begin
Result := AnsiLowerCase(C)[1];
end;
function ChrInvertCase(C: Char): Char;
begin
if C.IsUpper then
Result := AnsiLowerCase(C)[1]
else
Result := AnsiUpperCase(C)[1];
end;
function TxtTitleCase(const AText: string): string;
var
i: Integer;
StartOfWord: Boolean;
begin
StartOfWord := True;
Result := AText;
for i := 1 to AText.Length do
begin
if StartOfWord and AText[i].IsLetter then
begin
Result[i] := AnsiUpperCase(AText[i])[1];
StartOfWord := False;
end
else if AText[i].IsWhiteSpace then
StartOfWord := True;
end;
end;
function TxtSentenceCase(const AText: string): string;
var
StartOfSentence: Boolean;
i: Integer;
begin
StartOfSentence := True;
Result := AText;
for i := 1 to AText.Length do
if StartOfSentence and AText[i].IsLetter then
begin
Result[i] := AnsiUpperCase(AText[i])[1];
StartOfSentence := False;
end
else if CharInSet(AText[i], ['.', '!', '?']) or (AText[i] = '‽') then
StartOfSentence := True;
end;
function ChrROT13(C: Char): Char;
begin
if InRange(ord(C), ord('A'), ord('Z')) then
Result := Chr(ord('A') + (ord(C) - ord('A') + 13) mod 26)
else if InRange(ord(C), ord('a'), ord('z')) then
Result := Chr(ord('a') + (ord(C) - ord('a') + 13) mod 26)
else
Result := C;
end;
function ChrCaesar(N: Integer): TChrTransformFunc;
begin
Result := function(C: Char): Char
begin
if InRange(ord(C), ord('A'), ord('Z')) then
Result := Chr(ord('A') + imod((ord(C) - ord('A') + N), 26))
else if InRange(ord(C), ord('a'), ord('z')) then
Result := Chr(ord('a') + imod((ord(C) - ord('a') + N), 26))
else
Result := C;
end;
end;
function TxtVigenère(const AKey: string; ADecode: Boolean = False): TTextTransformFunc;
var
n: Integer;
KeyChrs: array of Byte;
i: Integer;
factor: Integer;
begin
n := AKey.Length;
SetLength(KeyChrs, n);
for i := 1 to n do
if InRange(ord(AKey[i]), ord('A'), ord('Z')) then
KeyChrs[i - 1] := ord(AKey[i]) - ord('A')
else
raise Exception.Create('Invalid character in Vigenère key.');
if n = 0 then
raise Exception.Create('Vigenère key is empty.');
factor := IfThen(ADecode, -1, 1);
Result := function(const AText: string): string
var
j: Integer;
begin
SetLength(Result, AText.Length);
for j := 1 to AText.Length do
begin
if InRange(ord(AText[j]), ord('A'), ord('Z')) then
Result[j] := Chr(ord('A') + imod(ord(AText[j]) - ord('A') + factor * KeyChrs[(j - 1) mod n], 26))
else if InRange(ord(AText[j]), ord('a'), ord('z')) then
Result[j] := Chr(ord('a') + imod(ord(AText[j]) - ord('a') + factor * KeyChrs[(j - 1) mod n], 26))
else
Result[j] := AText[j];
end;
end;
end;
function ContainsChar(const AText: string; AChr: Char): Boolean;
var
i: Integer;
begin
for i := 1 to AText.Length do
if AText[i] = AChr then
Exit(True);
Result := False;
end;
procedure GetWords(const AText: string; const AOptions: TWordExtractionOptions;
AWordList: TList<string>);
function IsWordSep(AChr: Char): Boolean;
begin
Result := AChr.IsWhiteSpace or
(AOptions.MathSeps and (AChr.GetUnicodeCategory = TUnicodeCategory.ucMathSymbol)) or
ContainsChar(AOptions.WordSeps, AChr);
end;
function IsTrimChar(AChr: Char): Boolean;
begin
Result := (AOptions.TrimPunct and AChr.IsPunctuation) or
ContainsChar(AOptions.TrimChrs, AChr);
end;
procedure ConsiderCandidate(AStart, AEnd: Integer);
var
HasReqChr: Boolean;
j: Integer;
begin
{$IFDEF DEBUG}
Assert(InRange(AStart, 1, AText.Length));
Assert(InRange(AEnd, 1, AText.Length));
Assert(AStart <= AEnd);
{$ENDIF}
while (AStart <= AEnd) and IsTrimChar(AText[AStart]) do
Inc(AStart);
while (AEnd >= AStart) and IsTrimChar(AText[AEnd]) do
Dec(AEnd);
if AStart > AEnd then
Exit;
if AOptions.LetterRequired then
begin
HasReqChr := False;
for j := AStart to AEnd do
if AText[j].IsLetter then
begin
HasReqChr := True;
Break;
end;
end
else if AOptions.LetterOrDigitRequired then
begin
HasReqChr := False;
for j := AStart to AEnd do
if AText[j].IsLetterOrDigit then
begin
HasReqChr := True;
Break;
end;
end
else
HasReqChr := True;
if not HasReqChr then
Exit;
AWordList.Add(Copy(AText, AStart, AEnd - AStart + 1));
end;
var
InProposedWord: Boolean;
Start: Integer;
i: Integer;
begin
if AWordList = nil then
raise Exception.Create('GetWords: word list is nil.');
InProposedWord := False;
Start := 1;
for i := 1 to AText.Length do
if InProposedWord then
begin
if IsWordSep(AText[i]) then
begin
InProposedWord := False;
ConsiderCandidate(Start, i - 1);
end;
end
else
begin
if not IsWordSep(AText[i]) then
begin
Start := i;
InProposedWord := True;
end;
end;
if InProposedWord then
ConsiderCandidate(Start, AText.Length);
end;
function GetWords(const AText: string; const AOptions: TWordExtractionOptions): TArray<string>;
var
List: TList<string>;
begin
List := TList<string>.Create;
try
GetWords(AText, AOptions, List);
Result := List.ToArray;
finally
List.Free;
end;
end;
function WordWrap(const AText: string; N: Integer = 80): string;
function IndexChr(const AChr: Char; AValues: array of Char): Integer;
var
i: Integer;
begin
for i := 0 to High(AValues) do
if AChr = AValues[i] then
Exit(i);
Result := -1;
end;
function ChrAnyOf(const AChr: Char; AValues: array of Char): Boolean;
begin
Result := IndexChr(AChr, AValues) <> -1;
end;
const
NBSP = #$A0;
NBHYP = #$2011;
function IsNbrChr(const AChr: Char): Boolean; inline;
begin
Result := ChrAnyOf(AChr, [NBSP, NBHYP]);
end;
function IsWordChar(const AChr: Char): Boolean; inline;
begin
Result := AChr.IsLetterOrDigit or (IndexChr(AChr, ['''', '’']) <> -1);
end;
function CanBreakAfter(AIndex: Integer): Boolean;
begin
if AIndex >= AText.Length then
Exit(True);
if AIndex < 1 then
Exit(True);
if IsWordChar(AText[AIndex]) and (IsWordChar(AText[AIndex + 1]) or (IndexChr(AText[AIndex + 1], ['.', ',', ':', ';', '!', '?', '-', '‐']) <> -1)) then
Exit(False);
if IsNbrChr(AText[AIndex]) or IsNbrChr(AText[AIndex + 1]) then
Exit(False);
Result := True;
end;
var
SB: TStringBuilder;
p, pp, newpp, p2, c: Integer;
begin
SB := TStringBuilder.Create(
AText.Length + Length(sLineBreak) * (AText.Length div Max(N-5, 1))
);
try
p := 1;
pp := 1;
c := 0;
while p <= AText.Length do
begin
if (AText[p] = #13) or (AText[p] = #10) then
begin
p2 := p - 1;
while (p2 >= pp) and AText[p2].IsWhiteSpace do
Dec(p2);
if p2 >= pp then
SB.Append(AText, pp - 1, p2 - pp + 1);
SB.AppendLine;
if (AText[p] = #13) and (p < AText.Length) and (AText[p + 1] = #10) then
Inc(p);
Inc(p);
pp := p;
c := 0;
Continue;
end;
if c = N then
begin
p2 := p - 1;
Assert(p2 >= pp);
while (p2 >= pp) and not CanBreakAfter(p2) do
Dec(p2);
if p2 < pp then
p2 := p - 1;
newpp := p2 + 1;
if AText[newpp].IsWhiteSpace and (newpp < AText.Length) and not AText[newpp + 1].IsWhiteSpace then
Inc(newpp);
while (p2 >= pp) and AText[p2].IsWhiteSpace do
Dec(p2);
if p2 >= pp then
SB.Append(AText, pp - 1, p2 - pp + 1);
SB.AppendLine;
pp := newpp;
p := pp;
c := 0;
Continue;
end;
if p = AText.Length then
SB.Append(AText, pp - 1, p - pp + 1);
Inc(p);
Inc(c);
end;
Result := SB.ToString;
finally
SB.Free;
end;
end;
function Pad(const AString: string; ALength: Integer; AAlignment: TAlignment;
APadding: Char): string;
var
N, L, R: Integer;
begin
if AString.Length >= ALength then
Exit(AString);
case AAlignment of
taLeftJustify:
Result := AString + StringOfChar(APadding, ALength - AString.Length);
taRightJustify:
Result := StringOfChar(APadding, ALength - AString.Length) + AString;
taCenter:
begin
N := ALength - AString.Length;
L := N div 2;
R := N - L;
Result := StringOfChar(APadding, L) + AString + StringOfChar(APadding, R);
end;
else
Result := AString;
end;
end;
function RemoveDuplicates(const AString: string): string;
var
ActualLength: Integer;
i: Integer;
Chars: TDictionary<Char, Pointer>;
begin
Chars := TDictionary<Char, Pointer>.Create;
try
SetLength(Result, AString.Length);
ActualLength := 0;
for i := 1 to AString.Length do
if not Chars.ContainsKey(AString[i]) then
begin
Inc(ActualLength);
Result[ActualLength] := AString[i];
Chars.Add(AString[i], nil);
end;
SetLength(Result, ActualLength);
finally
Chars.Free;
end;
end;
function RemoveAdjacentDuplicates(const AString: string): string;
var
ActualLength: Integer;
i: Integer;
begin
SetLength(Result, AString.Length);
ActualLength := 0;
for i := 1 to AString.Length do
if (i = 1) or (AString[i] <> AString[Pred(i)]) then
begin
Inc(ActualLength);
Result[ActualLength] := AString[i];
end;
SetLength(Result, ActualLength);
end;
end.