UnicodeData.pas

File name
C:\Users\Andreas Rejbrand\Documents\Dev\UnicodeInfo\UnicodeData.pas
Date exported
Time exported
Formatting processor
TPascalFormattingProcessor
{******************************************************************************}
{                                                                              }
{ Rejbrand Unicode Database Interface Unit                                     }
{                                                                              }
{ Copyright © 2016 Andreas Rejbrand                                            }
{                                                                              }
{ http://english.rejbrand.se/                                                  }
{                                                                              }
{******************************************************************************}

unit UnicodeData;

interface

const
  UCDUnicodeVersion = '8';

type
  TIntegerArray = array of integer;

  TBlockInfo = record
    BlockBegin, BlockEnd: integer;
    BlockName: string;
  end;

  TUCD = record
  strict private
    FBlocks: array of TBlockInfo;
    FBMPBlockCount: integer;
    procedure NeedBlocks;
    function GetBlock(Index: integer): TBlockInfo;
    function GetBlockCount: integer;
  private
    function GetBMPBlockCount: integer;
  public
    function GetChrCodepointStr(const ACode: Integer): string; overload;
    function GetChrCodepointStr(const ACode: Char): string; overload;
    function GetChrName(const ACodepoint: Integer): string; overload;
    function GetChrName(const ACodepoint: Char): string; overload;
    function GetChrBlock(const ACodepoint: Integer): string; overload;
    function GetChrBlock(const ACodepoint: Integer; out ABlockIndex: Integer): string; overload;
    function GetChrBlock(const ACodepoint: Char): string; overload;
    function SearchChrNames(const S: string; GetAll: Boolean = False): TArray<Integer>;
    property Blocks[Index: integer]: TBlockInfo read GetBlock;
    property BlockCount: Integer read GetBlockCount;
    property BMPBlockCount: Integer read GetBMPBlockCount;
  end;

var
  UCD: TUCD;

implementation

{$R UCD.res}

uses
  SysUtils, Classes, Windows, Math, Generics.Collections;

const
  U8CHRS = 'U8CHRS';
  U8BLOCKS = 'U8BLOCKS';

{ TUCD }

function TUCD.GetChrBlock(const ACodepoint: Integer): string;
begin
  NeedBlocks;
  Result := 'No_Block';
  for var i := 0 to high(FBlocks) do
    if InRange(ACodepoint, FBlocks[i].BlockBegin, FBlocks[i].BlockEnd) then
      Exit(FBlocks[i].BlockName);
end;

function TUCD.GetChrName(const ACodepoint: Integer): string;
begin
  Result := '';
  var RS := TResourceStream.Create(hInstance, U8CHRS, RT_RCDATA);
  try
    with TStringList.Create do
      try
        NameValueSeparator := ';';
        LoadFromStream(RS);
        result := Values[IntToHex(ACodepoint, 4)];
      finally
        Free;
      end;
  finally
    RS.Free;
  end;
end;

function TUCD.GetBlock(Index: Integer): TBlockInfo;
begin
  NeedBlocks;
  Result := FBlocks[Index];
end;

function TUCD.GetBlockCount: Integer;
begin
  NeedBlocks;
  Result := Length(FBlocks);
end;

function TUCD.GetBMPBlockCount: Integer;
begin
  NeedBlocks;
  Result := FBMPBlockCount;
end;

function TUCD.GetChrBlock(const ACodepoint: Char): string;
begin
  Result := GetChrBlock(ord(ACodepoint));
end;

function TUCD.GetChrBlock(const ACodepoint: Integer;
  out ABlockIndex: Integer): string;
begin
  NeedBlocks;
  Result := 'No_Block';
  ABlockIndex := -1;
  for var i := 0 to high(FBlocks) do
    if InRange(ACodepoint, FBlocks[i].BlockBegin, FBlocks[i].BlockEnd) then
    begin
      ABlockIndex := i;
      Exit(FBlocks[i].BlockName);
    end;
end;

function TUCD.GetChrCodepointStr(const ACode: Char): string;
begin
  Result := GetChrCodepointStr(ord(ACode));
end;

function TUCD.GetChrCodepointStr(const ACode: Integer): string;
begin
  Result := 'U+' + IntToHex(ACode, 4);
end;

function TUCD.GetChrName(const ACodepoint: Char): string;
begin
  Result := GetChrName(ord(ACodepoint));
end;

procedure TUCD.NeedBlocks;
var
  RS: TResourceStream;
  BlockBegin, BlockEnd: integer;
  i, p, p2: integer;
  CurrentLine: string;
begin
  if Length(FBlocks) > 0 then
    Exit;
  FBMPBlockCount := 0;
  RS := TResourceStream.Create(hInstance, U8BLOCKS, RT_RCDATA);
  try
    with TStringList.Create do
      try
        LoadFromStream(RS);
        SetLength(FBlocks, Count);
        for i := 0 to Count - 1 do
        begin
          CurrentLine := Strings[i];
          p := Pos('..', CurrentLine);
          p2 := Pos('; ', CurrentLine);
          if (p = 0) or (p2 = 0) or
            (not TryStrToInt('$' + Copy(CurrentLine, 1, p - 1), BlockBegin)) or
            (not TryStrToInt('$' + Copy(CurrentLine, p + 2, p2 - p - 2), BlockEnd)) then
            raise Exception.Create('Invalid UCBLOCKS database.');
          FBlocks[i].BlockBegin := BlockBegin;
          FBlocks[i].BlockEnd := BlockEnd;
          FBlocks[i].BlockName := Copy(CurrentLine, p2 + 2);
          if (FBMPBlockCount = 0) and (FBlocks[i].BlockBegin > $FFFF) then
            FBMPBlockCount := i;
        end;
      finally
        Free;
      end;
  finally
    RS.Free;
  end;
end;

function TUCD.SearchChrNames(const S: string; GetAll: Boolean): TArray<Integer>;
begin

  const SCAP = UpperCase(S);

  var RS := TResourceStream.Create(hInstance, U8CHRS, RT_RCDATA);
  try
    var L := TList<Integer>.Create;
    try
      var SL := TStringList.Create;
      try
        var cp: Integer;
        SL.NameValueSeparator := ';';
        SL.LoadFromStream(RS);
        for var i := 0 to SL.Count - 1 do
          if GetAll or (Pos(SCAP, SL.ValueFromIndex[i]) > 0) then
            if TryStrToInt('$' + SL.Names[i], cp) then
              L.Add(cp);
      finally
        SL.Free;
      end;
      Result := L.ToArray;
    finally
      L.Free;
    end;
  finally
    RS.Free;
  end;

end;

end.