UnicodeData.pas

File name
C:\Users\Andreas Rejbrand\Documents\Dev\UnicodeInfo\UnicodeData.pas
Date exported
Time exported
Formatting processor
TPascalFormattingProcessor
{******************************************************************************}
{                                                                              }
{ Rejbrand Unicode Database Interface Unit                                     }
{                                                                              }
{ Copyright © 2016 Andreas Rejbrand                                            }
{                                                                              }
{ http://english.rejbrand.se/                                                  }
{                                                                              }
{******************************************************************************}

unit UnicodeData;

interface

const
  UCDUnicodeVersion = '8';

type
  TIntegerArray = array of integer;

  TBlockInfo = record
    BlockBegin, BlockEnd: integer;
    BlockName: string;
  end;

  TUCD = record
  strict private
    FBlocks: array of TBlockInfo;
    FBMPBlockCount: integer;
    procedure NeedBlocks;
    function GetBlock(Index: integer): TBlockInfo;
    function GetBlockCount: integer;
  private
    function GetBMPBlockCount: integer;
  public
    function GetChrCodepointStr(const ACode: integer): string; overload;
    function GetChrCodepointStr(const ACode: char): string; overload;
    function GetChrName(const ACodepoint: integer): string; overload;
    function GetChrName(const ACodepoint: char): string; overload;
    function GetChrBlock(const ACodepoint: integer): string; overload;
    function GetChrBlock(const ACodepoint: char): string; overload;
    function SearchChrNames(const S: string; GetAll: boolean = false): TIntegerArray;
    property Blocks[Index: integer]: TBlockInfo read GetBlock;
    property BlockCount: integer read GetBlockCount;
    property BMPBlockCount: integer read GetBMPBlockCount;
  end;

var
  UCD: TUCD;

implementation

{$R UCD.res}

uses
  SysUtils, Classes, Windows, Math;

const
  U8CHRS = 'U8CHRS';
  U8BLOCKS = 'U8BLOCKS';

{ TUCD }

function TUCD.GetChrBlock(const ACodepoint: integer): string;
var
  i: Integer;
begin
  NeedBlocks;
  result := 'No_Block';
  for i := 0 to high(FBlocks) do
    if InRange(ACodepoint, FBlocks[i].BlockBegin, FBlocks[i].BlockEnd) then
      Exit(FBlocks[i].BlockName);
end;

function TUCD.GetChrName(const ACodepoint: integer): string;
var
  RS: TResourceStream;
begin
  result := '';
  RS := TResourceStream.Create(hInstance, U8CHRS, RT_RCDATA);
  try
    with TStringList.Create do
      try
        NameValueSeparator := ';';
        LoadFromStream(RS);
        result := Values[IntToHex(ACodepoint, 4)];
      finally
        Free;
      end;
  finally
    RS.Free;
  end;
end;

function TUCD.GetBlock(Index: integer): TBlockInfo;
begin
  NeedBlocks;
  result := FBlocks[Index];
end;

function TUCD.GetBlockCount: integer;
begin
  NeedBlocks;
  result := Length(FBlocks);
end;

function TUCD.GetBMPBlockCount: integer;
begin
  NeedBlocks;
  result := FBMPBlockCount;
end;

function TUCD.GetChrBlock(const ACodepoint: char): string;
begin
  result := GetChrBlock(ord(ACodepoint));
end;

function TUCD.GetChrCodepointStr(const ACode: char): string;
begin
  result := GetChrCodepointStr(ord(ACode));
end;

function TUCD.GetChrCodepointStr(const ACode: integer): string;
begin
  result := 'U+' + IntToHex(ACode, 4);
end;

function TUCD.GetChrName(const ACodepoint: char): string;
begin
  result := GetChrName(ord(ACodepoint));
end;

procedure TUCD.NeedBlocks;
var
  RS: TResourceStream;
  BlockBegin, BlockEnd: integer;
  i, p, p2: integer;
  CurrentLine: string;
begin
  if Length(FBlocks) > 0 then
    Exit;
  FBMPBlockCount := 0;
  RS := TResourceStream.Create(hInstance, U8BLOCKS, RT_RCDATA);
  try
    with TStringList.Create do
      try
        LoadFromStream(RS);
        SetLength(FBlocks, Count);
        for i := 0 to Count - 1 do
        begin
          CurrentLine := Strings[i];
          p := Pos('..', CurrentLine);
          p2 := Pos('; ', CurrentLine);
          if (p = 0) or (p2 = 0) or
            (not TryStrToInt('$' + Copy(CurrentLine, 1, p - 1), BlockBegin)) or
            (not TryStrToInt('$' + Copy(CurrentLine, p + 2, p2 - p - 2), BlockEnd)) then
            raise Exception.Create('Invalid UCBLOCKS database.');
          FBlocks[i].BlockBegin := BlockBegin;
          FBlocks[i].BlockEnd := BlockEnd;
          FBlocks[i].BlockName := Copy(CurrentLine, p2 + 2);
          if (FBMPBlockCount = 0) and (FBlocks[i].BlockBegin > $FFFF) then
            FBMPBlockCount := i;
        end;
      finally
        Free;
      end;
  finally
    RS.Free;
  end;
end;

function TUCD.SearchChrNames(const S: string; GetAll: boolean): TIntegerArray;
const
  ALLOC_BY = 128;
var
  ActualLength: integer;
  i: integer;

  procedure AddCodepoint(const C: integer);
  begin
    if ActualLength = Length(result) then
      SetLength(result, Length(result) + ALLOC_BY);

    result[ActualLength] := C;
    inc(ActualLength);
  end;

var
  RS: TResourceStream;
  SCAP: string;
  cp: integer;

begin
  ActualLength := 0;
  SetLength(result, 0);

  SCAP := UpperCase(S);

  RS := TResourceStream.Create(hInstance, U8CHRS, RT_RCDATA);
  try
    with TStringList.Create do
      try
        NameValueSeparator := ';';
        LoadFromStream(RS);
        for i := 0 to Count - 1 do
          if GetAll or (Pos(SCAP, ValueFromIndex[i]) > 0) then
            if TryStrToInt('$' + Names[i], cp) then
              AddCodepoint(cp);
      finally
        Free;
      end;
  finally
    RS.Free;
  end;

  SetLength(result, ActualLength);
end;

end.