unit UnicodeData;
interface
const
UCDUnicodeVersion = '8';
type
TIntegerArray = array of integer;
TBlockInfo = record
BlockBegin, BlockEnd: integer;
BlockName: string;
end;
TUCD = record
strict private
FBlocks: array of TBlockInfo;
FBMPBlockCount: integer;
procedure NeedBlocks;
function GetBlock(Index: integer): TBlockInfo;
function GetBlockCount: integer;
private
function GetBMPBlockCount: integer;
public
function GetChrCodepointStr(const ACode: integer): string; overload;
function GetChrCodepointStr(const ACode: char): string; overload;
function GetChrName(const ACodepoint: integer): string; overload;
function GetChrName(const ACodepoint: char): string; overload;
function GetChrBlock(const ACodepoint: integer): string; overload;
function GetChrBlock(const ACodepoint: char): string; overload;
function SearchChrNames(const S: string; GetAll: boolean = false): TIntegerArray;
property Blocks[Index: integer]: TBlockInfo read GetBlock;
property BlockCount: integer read GetBlockCount;
property BMPBlockCount: integer read GetBMPBlockCount;
end;
var
UCD: TUCD;
implementation
{$R UCD.res}
uses
SysUtils, Classes, Windows, Math;
const
U8CHRS = 'U8CHRS';
U8BLOCKS = 'U8BLOCKS';
function TUCD.GetChrBlock(const ACodepoint: integer): string;
var
i: Integer;
begin
NeedBlocks;
result := 'No_Block';
for i := 0 to high(FBlocks) do
if InRange(ACodepoint, FBlocks[i].BlockBegin, FBlocks[i].BlockEnd) then
Exit(FBlocks[i].BlockName);
end;
function TUCD.GetChrName(const ACodepoint: integer): string;
var
RS: TResourceStream;
begin
result := '';
RS := TResourceStream.Create(hInstance, U8CHRS, RT_RCDATA);
try
with TStringList.Create do
try
NameValueSeparator := ';';
LoadFromStream(RS);
result := Values[IntToHex(ACodepoint, 4)];
finally
Free;
end;
finally
RS.Free;
end;
end;
function TUCD.GetBlock(Index: integer): TBlockInfo;
begin
NeedBlocks;
result := FBlocks[Index];
end;
function TUCD.GetBlockCount: integer;
begin
NeedBlocks;
result := Length(FBlocks);
end;
function TUCD.GetBMPBlockCount: integer;
begin
NeedBlocks;
result := FBMPBlockCount;
end;
function TUCD.GetChrBlock(const ACodepoint: char): string;
begin
result := GetChrBlock(ord(ACodepoint));
end;
function TUCD.GetChrCodepointStr(const ACode: char): string;
begin
result := GetChrCodepointStr(ord(ACode));
end;
function TUCD.GetChrCodepointStr(const ACode: integer): string;
begin
result := 'U+' + IntToHex(ACode, 4);
end;
function TUCD.GetChrName(const ACodepoint: char): string;
begin
result := GetChrName(ord(ACodepoint));
end;
procedure TUCD.NeedBlocks;
var
RS: TResourceStream;
BlockBegin, BlockEnd: integer;
i, p, p2: integer;
CurrentLine: string;
begin
if Length(FBlocks) > 0 then
Exit;
FBMPBlockCount := 0;
RS := TResourceStream.Create(hInstance, U8BLOCKS, RT_RCDATA);
try
with TStringList.Create do
try
LoadFromStream(RS);
SetLength(FBlocks, Count);
for i := 0 to Count - 1 do
begin
CurrentLine := Strings[i];
p := Pos('..', CurrentLine);
p2 := Pos('; ', CurrentLine);
if (p = 0) or (p2 = 0) or
(not TryStrToInt('$' + Copy(CurrentLine, 1, p - 1), BlockBegin)) or
(not TryStrToInt('$' + Copy(CurrentLine, p + 2, p2 - p - 2), BlockEnd)) then
raise Exception.Create('Invalid UCBLOCKS database.');
FBlocks[i].BlockBegin := BlockBegin;
FBlocks[i].BlockEnd := BlockEnd;
FBlocks[i].BlockName := Copy(CurrentLine, p2 + 2);
if (FBMPBlockCount = 0) and (FBlocks[i].BlockBegin > $FFFF) then
FBMPBlockCount := i;
end;
finally
Free;
end;
finally
RS.Free;
end;
end;
function TUCD.SearchChrNames(const S: string; GetAll: boolean): TIntegerArray;
const
ALLOC_BY = 128;
var
ActualLength: integer;
i: integer;
procedure AddCodepoint(const C: integer);
begin
if ActualLength = Length(result) then
SetLength(result, Length(result) + ALLOC_BY);
result[ActualLength] := C;
inc(ActualLength);
end;
var
RS: TResourceStream;
SCAP: string;
cp: integer;
begin
ActualLength := 0;
SetLength(result, 0);
SCAP := UpperCase(S);
RS := TResourceStream.Create(hInstance, U8CHRS, RT_RCDATA);
try
with TStringList.Create do
try
NameValueSeparator := ';';
LoadFromStream(RS);
for i := 0 to Count - 1 do
if GetAll or (Pos(SCAP, ValueFromIndex[i]) > 0) then
if TryStrToInt('$' + Names[i], cp) then
AddCodepoint(cp);
finally
Free;
end;
finally
RS.Free;
end;
SetLength(result, ActualLength);
end;
end.