// SPDX-License-Identifier: LGPL-3.0-only (modified to allow linking) program parseunicodeclasses; uses Classes, sysutils, fgl, LazUTF8; type TIntegerList = specialize TFPGList; var UnicodeData: array of record Code: LongInt; Name, Category: string; CombiningClass: byte; BidiClass, Decomposition: string; Mirrored: boolean; OldName: string; end; UnicodeCount: integer; procedure LoadUnicodeData; var lines, cells: TStringList; i: Integer; begin lines := TStringList.Create; lines.LoadFromFile('UnicodeData.txt'); setlength(UnicodeData, lines.Count); UnicodeCount := 0; cells := TStringList.Create; cells.Delimiter := ';'; cells.QuoteChar := '"'; cells.StrictDelimiter := true; for i := 0 to lines.Count-1 do begin cells.DelimitedText := lines[i]; if cells.Count >= 11 then with UnicodeData[UnicodeCount] do begin Code := StrToInt('$'+cells[0]); Name := cells[1]; Category := cells[2]; CombiningClass:= StrToInt(cells[3]); BidiClass := cells[4]; Decomposition:= cells[5]; Mirrored := (cells[9] = 'Y'); OldName := cells[10]; inc(UnicodeCount); end; end; SetLength(UnicodeData, unicodeCount); lines.Free; end; function IndexOfUnicode(u: LongInt): integer; var low, high, mid: Integer; begin low := 0; high := UnicodeCount-1; while low < high do begin mid := (low+high) div 2; if u > UnicodeData[mid].Code then low := mid+1 else high := mid; end; if UnicodeData[low].Code = u then result := low else result := -1; end; function GetUnicodeBidiClass(u: LongInt): string; var idx: Integer; begin idx := IndexOfUnicode(u); if idx = -1 then result := '' else result := UnicodeData[idx].BidiClass; end; function GetUnicodeCombiningClass(u: LongInt): byte; var idx: Integer; begin idx := IndexOfUnicode(u); if idx = -1 then result := 0 else result := UnicodeData[idx].CombiningClass; end; function UnicodeCharToUTF8(u: LongInt): string; begin if u >= 0 then result := UnicodeToUTF8(cardinal(u)) else result := ''; end; function ArrayOfCodeToCase(ACodes: array of integer; AIndent: string): string; var codeCount, i: Integer; bufLines: TStringList; buf: String; begin codeCount := length(ACodes); if codeCount = 0 then exit(''); bufLines := TStringList.Create; i := 0; buf := AIndent+' '; while i < codeCount do begin if i > 0 then buf += ', '; if length(buf) > 95 then begin bufLines.Add(buf); buf := AIndent+' '; end; if (i+2 < codeCount) and (ACodes[i]+1 = ACodes[i+1]) and (ACodes[i+1]+1 = ACodes[i+2]) then begin buf += '$'+IntToHex(ACodes[i],2) + '..'; while (i+1 < codeCount) and (ACodes[i]+1 = ACodes[i+1]) do inc(i); buf += '$'+IntToHex(ACodes[i],2); end else buf += '$'+IntToHex(ACodes[i],2); inc(i); end; if trim(buf) <> '' then bufLines.Add(buf); result := ''; for i := 0 to bufLines.Count-1 do begin if i > 0 then result += LineEnding; result += bufLines[i]; end; bufLines.Free; result += ': '; end; function ArrayOfCodeToCase(ACodes: TIntegerList; AIndent: string): string; var a: array of integer; i: Integer; begin setlengtH(a, ACodes.Count); for i := 0 to high(a) do a[i] := ACodes[i]; result := ArrayOfCodeToCase(a, AIndent); end; procedure GenerateUnicodeFunctions; const Indent = ' '; var tIn, tOut: TextFile; procedure ParseBidiClasses; type TUnicodeBidiClass = (ubcBoundaryNeutral, ubcSegmentSeparator, ubcParagraphSeparator, ubcWhiteSpace, ubcOtherNeutrals, ubcCommonSeparator, ubcNonSpacingMark, ubcLeftToRight, ubcEuropeanNumber, ubcEuropeanNumberSeparator, ubcEuropeanNumberTerminator, ubcRightToLeft, ubcArabicLetter, ubcArabicNumber, ubcUnknown, ubcCombiningLeftToRight, //ubcLeftToRight in Mc category ubcMirroredNeutral); //ubcOtherNeutrals with Mirrored property procedure IncludeClasses(AClasses: TStrings; AMinCode, AMaxCode: integer); const MaxGapsPerClass = 20; var codes: array[TUnicodeBidiClass] of TIntegerList; gaps: array[TUnicodeBidiClass] of integer; procedure FlushCase(curBidi: TUnicodeBidiClass); var caseStr: string; begin if codes[curBidi].Count = 0 then exit; caseStr := ArrayOfCodeToCase(codes[curBidi], Indent); case curBidi of ubcCommonSeparator: WriteLn(tOut,caseStr+'result := ubcCommonSeparator;'); ubcLeftToRight: WriteLn(tOut,caseStr+'result := ubcLeftToRight;'); ubcCombiningLeftToRight: WriteLn(tOut,caseStr+'result := ubcCombiningLeftToRight;'); ubcEuropeanNumber: WriteLn(tOut,caseStr+'result := ubcEuropeanNumber;'); ubcEuropeanNumberSeparator: WriteLn(tOut,caseStr+'result := ubcEuropeanNumberSeparator;'); ubcEuropeanNumberTerminator: WriteLn(tOut,caseStr+'result := ubcEuropeanNumberTerminator;'); ubcRightToLeft: WriteLn(tOut,caseStr+'result := ubcRightToLeft;'); ubcArabicLetter: WriteLn(tOut,caseStr+'result := ubcArabicLetter;'); ubcArabicNumber: WriteLn(tOut,caseStr+'result := ubcArabicNumber;'); ubcNonSpacingMark: WriteLn(tOut,caseStr+'result := ubcNonSpacingMark;'); ubcBoundaryNeutral: WriteLn(tOut,caseStr+'result := ubcBoundaryNeutral;'); ubcParagraphSeparator: WriteLn(tOut,caseStr+'result := ubcParagraphSeparator;'); ubcSegmentSeparator: WriteLn(tOut,caseStr+'result := ubcSegmentSeparator;'); ubcWhiteSpace: WriteLn(tOut,caseStr+'result := ubcWhiteSpace;'); ubcMirroredNeutral: WriteLn(tOut,caseStr+'result := ubcMirroredNeutral;'); ubcOtherNeutrals: WriteLn(tOut,caseStr+'result := ubcOtherNeutrals;'); else raise exception.Create('Unknown bidi class'); end; codes[curBidi].Clear; gaps[curBidi] := 0; end; var newBidi: TUnicodeBidiClass; curCode: LongInt; i: integer; begin write(' ', AClasses.DelimitedText); for newBidi := low(TUnicodeBidiClass) to high(TUnicodeBidiClass) do begin codes[newBidi] := TIntegerList.Create; gaps[newBidi] := 0; end; for i := 0 to UnicodeCount-1 do begin case UnicodeData[i].BidiClass of 'CS': newBidi := ubcCommonSeparator; 'L': newBidi := ubcLeftToRight; 'EN': newBidi := ubcEuropeanNumber; 'ES': newBidi := ubcEuropeanNumberSeparator; 'ET': newBidi := ubcEuropeanNumberTerminator; 'R': newBidi := ubcRightToLeft; 'AL': newBidi := ubcArabicLetter; 'AN': newBidi := ubcArabicNumber; 'NSM': newBidi := ubcNonSpacingMark; 'BN': newBidi := ubcBoundaryNeutral; 'B': newBidi := ubcParagraphSeparator; 'S': newBidi := ubcSegmentSeparator; 'WS': newBidi := ubcWhiteSpace; 'ON': newBidi := ubcOtherNeutrals; else continue; end; if (newBidi = ubcLeftToRight) and (UnicodeData[i].Category = 'Mc') then newBidi := ubcCombiningLeftToRight else if (newBidi = ubcOtherNeutrals) and UnicodeData[i].Mirrored then newBidi := ubcMirroredNeutral; if AClasses.IndexOf(UnicodeData[i].BidiClass)<>-1 then begin curCode := UnicodeData[i].Code; if (curCode >= AMinCode) and (curCode <= AMaxCode) then begin if (codes[newBidi].Count > 0) and (codes[newBidi].Last+1 <> curCode) then inc(gaps[newBidi]); codes[newBidi].Add(curCode); if gaps[newBidi] > MaxGapsPerClass then FlushCase(newBidi); end; end; end; for newBidi := low(TUnicodeBidiClass) to high(TUnicodeBidiClass) do begin FlushCase(newBidi); codes[newBidi].Free; end; end; var c: TStringList; procedure Include(AMinCode,AMaxCode: integer); begin write('Classes from ',IntToHex(AMinCode,2),' to ',IntToHex(AMaxCode,2),':'); Writeln(tOut,Indent+'case u of'); c.CommaText := 'BN'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'S'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'B'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'WS'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'L,CL,R,AL'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'EN'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'ES'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'ET'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'AN'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'CS,NSM'; IncludeClasses(c, AMinCode,AMaxCode); c.CommaText := 'ON,MN'; IncludeClasses(c, AMinCode,AMaxCode); writeln(tout,Indent+'else result := ubcUnknown;'); writeln(tout,Indent+'end;'); writeln; end; begin Writeln(tOut,'function GetUnicodeBidiClassEx(u: LongWord): TUnicodeBidiClass;'); Writeln(tOut,'begin'); c := TStringList.Create; writeln(tOut,' case u of'); writeln(tOut,' $00000..$07FFF:'); writeln(tOut,' case u of'); writeln(tOut,' $00000..$003FF:'); Include($00000, $003FF); writeln(tOut,' $00400..$007FF:'); Include($00400, $007FF); writeln(tOut,' $00800..$00BFF:'); Include($00800, $00BFF); writeln(tOut,' $00C00..$00FFF:'); Include($00C00, $00FFF); writeln(tOut,' $01000..$017FF:'); Include($01000, $017FF); writeln(tOut,' $01800..$01FFF:'); Include($01800, $01FFF); writeln(tOut,' $02000..$02FFF:'); Include($02000, $02FFF); writeln(tOut,' else'); Include($03000, $07FFF); writeln(tOut,' end;'); writeln(tOut,' $08000..$0BFFF:'); Include($08000, $0BFFF); writeln(tOut,' $0C000..$0FFFF:'); Include($0C000, $0FFFF); writeln(tOut,' else'); writeln(tOut,' case u of'); writeln(tOut,' $10000..$107FF:'); Include($10000, $107FF); writeln(tOut,' $10800..$10FFF:'); Include($10800, $10FFF); writeln(tOut,' $11000..$117FF:'); Include($11000, $117FF); writeln(tOut,' $11800..$17FFF:'); Include($11800, $17FFF); writeln(tOut,' $18000..$1DFFF:'); Include($18000, $1DFFF); writeln(tOut,' $1E000..$FFFFF:'); Include($1E000, $FFFFF); writeln(tOut,' else result := ubcUnknown;'); writeln(tOut,' end'); writeln(tOut,' end'); c.Free; writeln(tout,'end;'); writeln(tout); end; procedure ParseBidiBrackets; var elem: TStringList; line: string; tIn: TextFile; begin Writeln(tOut,'function GetUnicodeBracketInfo(u: LongWord): TUnicodeBracketInfo;'); Writeln(tOut,' procedure Bracket(AOpening,AClosing: LongWord);'); Writeln(tOut,' begin'); Writeln(tOut,' result.IsBracket := true;'); Writeln(tOut,' result.OpeningBracket := AOpening;'); Writeln(tOut,' result.ClosingBracket := AClosing;'); Writeln(tOut,' end;'); Writeln(tOut,'begin'); Writeln(tOut,' case u of'); writeln('Parsing bracket data...'); assignfile(tIn, 'BidiBrackets.txt'); reset(tin); elem := TStringList.Create; elem.Delimiter := ';'; elem.StrictDelimiter:= true; while not eof(tin) do begin readln(tin, line); elem.DelimitedText:= line; if elem.Count >= 3 then begin if copy(trim(elem[2]),1,1) = 'o' then writeln(tOut,' $'+trim(elem[0])+', $'+trim(elem[1])+': Bracket($'+trim(elem[0])+', $'+trim(elem[1])+');'); end; end; elem.Free; closefile(tin); writeln(tout,' else'); writeln(tout,' begin'); writeln(tout,' result.IsBracket := false;'); writeln(tout,' result.OpeningBracket := 0;'); writeln(tout,' result.ClosingBracket := 0;'); writeln(tout,' end;'); Writeln(tOut,' end;'); Writeln(tOut,'end;'); Writeln(tOut); end; procedure ParseArabicLigature; var line: string; cells: TStringList; chars: TStringList; u: LongInt; j: Integer; procedure AddJoiningType(joinType: string; joinTypeEnum: string; AIndent: string; AMinIndex,AMaxIndex: integer); var i,nb: Integer; charsList: array of integer; begin nb := 0; for i := AMinIndex to AMaxIndex do if chars.ValueFromIndex[i]=joinType then inc(nb); if nb = 0 then exit; setlength(charsList, nb); nb := 0; for i := AMinIndex to AMaxIndex do if chars.ValueFromIndex[i]=joinType then begin charsList[nb] := StrToInt('$'+chars.Names[i]); inc(nb); end; writeln(tOut,ArrayOfCodeToCase(charsList, AIndent)+'result := '+joinTypeEnum+';'); end; procedure AddJoiningTypeRange(AMinIndex,AMaxIndex: integer; AIndent: string; AForceCase: boolean = false); const MaxGaps = 45; var mid, i, gaps, halfGaps: Integer; begin gaps := 0; for i := AMinIndex+1 to AMaxIndex do if (StrToInt('$'+chars.Names[i])-StrToInt('$'+chars.Names[i-1]) > 1) or (chars.ValueFromIndex[i] <> chars.ValueFromIndex[i-1]) then inc(gaps); if (gaps > MaxGaps) and not AForceCase then begin halfGaps := 0; mid := (AMinIndex+AMaxIndex) div 2; for i := AMinIndex+1 to AMaxIndex do if (StrToInt('$'+chars.Names[i])-StrToInt('$'+chars.Names[i-1]) > 1) or (chars.ValueFromIndex[i] <> chars.ValueFromIndex[i-1]) then begin inc(halfGaps); if halfGaps >= gaps shr 1 then begin mid := i; break; end; end; if gaps <= MaxGaps*2.5 then begin writeln(tOut,AIndent, 'if u <= $', chars.Names[mid],' then'); AddJoiningTypeRange(AMinIndex, mid, AIndent+' ', true); writeln(tOut,AIndent, 'else'); AddJoiningTypeRange(mid+1, AMaxIndex, AIndent+' ', true); end else begin writeln(tOut,AIndent, 'if u <= $', chars.Names[mid],' then begin'); AddJoiningTypeRange(AMinIndex, mid, AIndent+' '); writeln(tOut,AIndent, 'end else begin'); AddJoiningTypeRange(mid+1, AMaxIndex, AIndent+' '); writeln(tOut,AIndent, 'end'); end; end else begin writeln(tOut,AIndent, 'case u of'); AddJoiningType('T', 'ujtTransparent', AIndent, AMinIndex, AMaxIndex); AddJoiningType('R', 'ujtRightJoining', AIndent, AMinIndex, AMaxIndex); AddJoiningType('L', 'ujtLeftJoining', AIndent, AMinIndex, AMaxIndex); AddJoiningType('D', 'ujtDualJoining', AIndent, AMinIndex, AMaxIndex); AddJoiningType('C', 'ujtJoinCausing', AIndent, AMinIndex, AMaxIndex); writeln(tOut,AIndent, 'end'); end; end; begin writeln('Parsing arabic ligature data...'); chars := TStringList.Create; for j := 0 to UnicodeCount-1 do begin if (UnicodeData[j].Category = 'Mn') or (UnicodeData[j].Category = 'Me') or (UnicodeData[j].Category = 'Cf') then chars.Values[IntToHex(UnicodeData[j].Code,6)] := 'T'; end; assignfile(tIn, 'ArabicShaping.txt'); reset(tIn); cells := TStringList.Create; cells.Delimiter := ';'; cells.QuoteChar := '"'; cells.StrictDelimiter := true; while not eof(tIn) do begin readln(tIn, line); if (line = '') or (line[1]='#') then continue; cells.DelimitedText:= line; if cells.Count >= 4 then begin u := StrToInt('$'+cells[0]); if trim(cells[2]) = 'U' then begin j := chars.IndexOfName(IntToHex(u,6)); if j <> -1 then chars.Delete(j); end else chars.Values[IntToHex(u,6)] := trim(cells[2]); end; end; closefile(tIn); cells.Free; chars.Sort; Writeln(tOut,'function GetUnicodeJoiningType(u: LongWord): TUnicodeJoiningType;'); Writeln(tOut,'begin'); writeln(tOut,' result := ujtNonJoining;'); AddJoiningTypeRange(0, chars.Count-1, ' '); chars.Free; Writeln(tOut,'end;'); Writeln(tOut); end; procedure ParseCombiningClasses; const CombineLeftOnly = '093F,094E,' + {DEVANAGARI} '09BF,09C7,09C8,' + {BENGALI} '0A3F,' + {GURMUKHI} '0ABF,' + {GUJARATI} '0B47,0B48,0B4B,0B4C,' + {ORIYA} '0BC6,0BC7,0BC8,' + {TAMIL} '0D46,0D47,0D48,' + {MALAYALAM} '0DD9,0DDA,0DDB,0DDC,0DDD,0DDE,' + {SINHALA} '1031,103C,1084,' + {MYANMAR} '17BE,17C1,17C2,17C3,' + {KHMER} '1A19,' + {BUGINESE} '1B3E,1B3F,' + {BALINESE} '302E,302F,' + {HANGUL} 'A9BA,A9BB,A9BF,' + {JAVANESE} 'AA2F,AA30,AA34,'; {CHAM} CombineLeftAndRight = '09CB,09CC,' + {BENGALI} '0BCA,0BCB,0BCC,' + {TAMIL} '0D4A,0D4B,0D4C,' + {MALAYALAM} '17BF,17C0,17C4,17C5,' + {KHMER} '1B3D,1B40,1B41,'; {BALINESE} var i: Integer; infos: TStringList; u: LongInt; c: byte; s: String; procedure FlushLine; begin writeln(tOut, s); s := ' '; end; begin infos := TStringList.Create; for i := 0 to UnicodeCount-1 do begin u := UnicodeData[i].Code; if (UnicodeData[i].BidiClass = 'NSM') or (UnicodeData[i].Category = 'Mc') then begin c := UnicodeData[i].CombiningClass; if (c = 0) and (UnicodeData[i].Category = 'Mc') then begin if pos(IntToHex(u,4)+',', CombineLeftOnly) <> 0 then c := 208 else if pos(IntToHex(u,4)+',', CombineLeftAndRight) <> 0 then c := 0 else c := 210; end; infos.Add('(u:$'+IntToHex(u,2)+'; c:'+IntToStr(c)+')'); end; end; writeln(tOut,'type'); writeln(tOut,' TUnicodeCombiningInfo = record'); writeln(tOut,' u: LongWord;'); writeln(tOut,' c: Byte;'); writeln(tOut,' end;'); writeln(tOut,'const'); writeln(tOut,' UnicodeCombiningInfos: array[0..',infos.count-1,'] of TUnicodeCombiningInfo ='); s := ' ('; for i := 0 to infos.Count-1 do begin if length(s) + length(infos[i]) + 2 > 80 then FlushLine; AppendStr(s, ' ' + infos[i]); if i < infos.Count-1 then AppendStr(s, ','); end; if s <> ' ' then FlushLine; writeln(tOut,' );'); writeln(tOut); infos.Free; end; begin AssignFile(tOut, 'generatedunicode.inc'); Rewrite(tOut); writeln(tOut,'{ This file is generated by dev/parseunicode/parseunicodeclasses program }'); Writeln(tOut); ParseBidiClasses; ParseBidiBrackets; ParseArabicLigature; ParseCombiningClasses; CloseFile(tOut); end; function ListCompareBinary(List: TStringList; Index1, Index2: Integer): Integer; begin result := CompareStr(List[Index1], List[Index2]); end; procedure ParseUTF8Decomposition; type TDecompositionKind = string; const dMultichar = 'arNone'; dInitial = 'arInitial'; dMedial = 'arMedial'; dFinal = 'arFinal'; dIsolated = 'arIsolated'; const UTF8_ARABIC_ALEPH = 'ا'; UTF8_ARABIC_ALEPH_HAMZA_BELOW = 'إ'; UTF8_ARABIC_ALEPH_HAMZA_ABOVE = 'أ'; UTF8_ARABIC_ALEPH_MADDA_ABOVE = 'آ'; UTF8_ARABIC_LAM = 'ل'; var tOut: TextFile; decomposed, kind, decomposedUTF8, s: string; decomposedFirstChar: LongInt; mergedU,nextU, fallbackU: LongInt; posClose, posSpace: SizeInt; hasNSM, isLa: Boolean; correspList: TStringList; kerningFallback: TStringList; i, decomposedLen, j: Integer; typedKind: TDecompositionKind; hasMarkLeft, hasMarkRight: boolean; function RemoveUptoTab(AText: string): string; var idxTab: SizeInt; begin idxTab := pos(#9, AText); result := copy(AText, idxTab+1, length(AText)-idxTab); end; begin writeln('Parsing decomposition data...'); correspList := TStringList.Create; kerningFallback := TStringList.Create; for j := 0 to UnicodeCount-1 do begin mergedU := UnicodeData[j].Code; if UnicodeData[j].BidiClass = 'NSM' then continue; decomposed := UnicodeData[j].Decomposition; if decomposed = '' then continue; typedKind := dMultichar; if decomposed[1] = '<' then begin posClose := pos('>', decomposed); if posClose = 0 then continue; kind := copy(decomposed,1,posClose); delete(decomposed, 1, posClose); if kind = '' then typedKind := dInitial else if kind = '' then typedKind := dMedial else if kind = '' then typedKind := dFinal else if kind = '' then typedKind := dIsolated else if (kind = '') and (mergedU >= $FB00) and (mergedU <= $FB04) then typedKind := dMultichar else continue; decomposed := trim(decomposed); end; decomposedUTF8 := ''; decomposedLen := 0; decomposedFirstChar:= 0; hasMarkLeft := false; hasMarkRight := false; hasNSM := false; while decomposed <> '' do begin posSpace := pos(' ',decomposed); if posSpace = 0 then posSpace := length(decomposed)+1; nextU := strToInt('$'+copy(decomposed,1,posSpace-1)); if GetUnicodeBidiClass(nextU) = 'NSM' then hasNSM := true; case GetUnicodeCombiningClass(nextU) of 200,208,212,218,224,228: hasMarkLeft := true; 204,210,216,222,226,232: hasMarkRight := true; end; if decomposedLen = 0 then decomposedFirstChar:= nextU; AppendStr(decomposedUTF8, UnicodeCharToUTF8(nextU)); delete(decomposed, 1, posSpace); inc(decomposedLen); end; isLa := (decomposedUTF8 = UTF8_ARABIC_LAM+UTF8_ARABIC_ALEPH) or (decomposedUTF8 = UTF8_ARABIC_LAM+UTF8_ARABIC_ALEPH_HAMZA_BELOW) or (decomposedUTF8 = UTF8_ARABIC_LAM+UTF8_ARABIC_ALEPH_HAMZA_ABOVE) or (decomposedUTF8 = UTF8_ARABIC_LAM+UTF8_ARABIC_ALEPH_MADDA_ABOVE); if ((typedKind = dMultichar) and (decomposedLen > 1) and (hasNSM or (copy(decomposedUTF8,1,1) = 'f'))) or ((typedKind <> dMultichar) and ((decomposedLen = 1) or isLa)) then correspList.Add(decomposedUTF8+#9+'('+ 'de:''' + decomposedUTF8 + '''; ' + 're:''' + UnicodeCharToUTF8(mergedU) + '''; ' + 'join:' + typedKind + ')'); if (typedKind = dMultichar) and (decomposedUTF8 <> '') and not hasMarkLeft and not hasMarkRight and ((decomposedUTF8[1] in ['A'..'Z']) or (copy(decomposedUTF8,1,length('Æ')) = 'Æ') or (copy(decomposedUTF8,1,length('Ç')) = 'Ç') or (copy(decomposedUTF8,1,length('Г')) = 'Г') or (copy(decomposedUTF8,1,length('Ѵ')) = 'Ѵ') or (copy(decomposedUTF8,1,length('Ω')) = 'Ω') or (copy(decomposedUTF8,1,length('Ө')) = 'Ө')) then begin fallbackU := decomposedFirstChar; if fallbackU <> 32 then kerningFallback.Add('(u:$' + inttohex(mergedU,2)+'; fb:$'+ inttohex(fallbackU,2)+')'); end; end; AssignFile(tOut, 'generatedutf8.inc'); Rewrite(tOut); writeln(tOut,'{ This file is generated by dev/parseunicode/parseunicodeclasses program }'); writeln(tOut, 'type'); writeln(tOut, ' TArabicJoin = (arNone, arInitial, arMedial, arFinal, arIsolated);'); writeln(tOut, ' TUTF8Decomposition = record'); writeln(tOut, ' de, re: string; //decomposed, recomposed UTF8'); writeln(tOut, ' join: TArabicJoin;'); writeln(tOut, ' end;'); writeln(tOut, 'const'); writeln(tOut, ' UTF8Decomposition : array[0..', correspList.Count-1, '] of TUTF8Decomposition = ('); correspList.CustomSort(@ListCompareBinary); for i := 0 to correspList.Count-1 do if i <> correspList.Count-1 then writeln(tOut, ' ', RemoveUptoTab(correspList[i]), ',') else writeln(tOut, ' ', RemoveUptoTab(correspList[i])); correspList.Free; writeln(tOut, ' );'); writeln(tout); CloseFile(tOut); AssignFile(tOut, 'generatedkerningfallback.inc'); Rewrite(tOut); writeln(tOut,'{ This file is generated by dev/parseunicode/parseunicodeclasses program }'); writeln(tOut, 'type'); writeln(tOut, ' TKerningFallbackInfo = record'); writeln(tOut, ' u: integer; //composed charcode'); writeln(tOut, ' fb: integer; //fallback code'); writeln(tOut, ' end;'); writeln(tOut, 'const'); writeln(tOut, ' KerningFallbackInfo : array[0..', kerningFallback.Count-1, '] of TKerningFallbackInfo = ('); s := ''; for i := 0 to kerningFallback.Count-1 do begin if i <> kerningFallback.Count-1 then AppendStr(s, kerningFallback[i] + ', ') else AppendStr(s, kerningFallback[i]); if length(s) > 70 then begin writeln(tOut, ' ', s); s := ''; end; end; if s <> '' then writeln(tOut, ' ', s); writeln(tOut, ' );'); writeln(tout); kerningFallback.Free; CloseFile(tOut); end; begin LoadUnicodeData; GenerateUnicodeFunctions; ParseUTF8Decomposition; writeln('Done.'); end.