416 lines
10 KiB
ObjectPascal
416 lines
10 KiB
ObjectPascal
unit DCConvertEncoding;
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, SysUtils;
|
|
|
|
var
|
|
|
|
{en
|
|
Convert from OEM to System encoding, if needed
|
|
}
|
|
CeOemToSys: function (const Source: String): String;
|
|
CeSysToOem: function (const Source: String): String;
|
|
|
|
{en
|
|
Convert from OEM to UTF-8 encoding, if needed
|
|
}
|
|
CeOemToUtf8: function (const Source: String): String;
|
|
CeUtf8ToOem: function (const Source: String): String;
|
|
|
|
{en
|
|
Convert from Ansi to System encoding, if needed
|
|
}
|
|
CeAnsiToSys: function (const Source: String): String;
|
|
CeSysToAnsi: function (const Source: String): String;
|
|
|
|
{en
|
|
Convert from ANSI to UTF-8 encoding, if needed
|
|
}
|
|
CeAnsiToUtf8: function (const Source: String): String;
|
|
CeUtf8ToAnsi: function (const Source: String): String;
|
|
|
|
{en
|
|
Convert from Utf8 to System encoding, if needed
|
|
}
|
|
CeUtf8ToSys: function (const Source: String): String;
|
|
CeSysToUtf8: function (const Source: String): String;
|
|
|
|
function CeRawToUtf8(const Source: String): String;
|
|
|
|
{$IF DEFINED(MSWINDOWS)}
|
|
function CeTryEncode(const aValue: UnicodeString; aCodePage: Cardinal;
|
|
aAllowBestFit: Boolean; out aResult: AnsiString): Boolean;
|
|
function CeTryDecode(const aValue: AnsiString; aCodePage: Cardinal;
|
|
out aResult: UnicodeString): Boolean;
|
|
{$ELSEIF DEFINED(UNIX)}
|
|
var
|
|
SystemEncodingUtf8: Boolean = False;
|
|
SystemLanguage, SystemEncoding: String;
|
|
{$ENDIF}
|
|
|
|
implementation
|
|
|
|
uses
|
|
{$IF DEFINED(UNIX)}
|
|
iconvenc_dyn
|
|
{$IF DEFINED(DARWIN)}
|
|
, MacOSAll
|
|
{$ENDIF}
|
|
{$ELSEIF DEFINED(MSWINDOWS)}
|
|
Windows
|
|
{$ENDIF}
|
|
;
|
|
|
|
function UTF8CharacterStrictLength(P: PAnsiChar): integer;
|
|
begin
|
|
if p=nil then exit(0);
|
|
if ord(p^)<%10000000 then begin
|
|
// regular single byte character
|
|
exit(1);
|
|
end
|
|
else if ord(p^)<%11000000 then begin
|
|
// invalid single byte character
|
|
exit(0);
|
|
end
|
|
else if ((ord(p^) and %11100000) = %11000000) then begin
|
|
// should be 2 byte character
|
|
if (ord(p[1]) and %11000000) = %10000000 then
|
|
exit(2)
|
|
else
|
|
exit(0);
|
|
end
|
|
else if ((ord(p^) and %11110000) = %11100000) then begin
|
|
// should be 3 byte character
|
|
if ((ord(p[1]) and %11000000) = %10000000)
|
|
and ((ord(p[2]) and %11000000) = %10000000) then
|
|
exit(3)
|
|
else
|
|
exit(0);
|
|
end
|
|
else if ((ord(p^) and %11111000) = %11110000) then begin
|
|
// should be 4 byte character
|
|
if ((ord(p[1]) and %11000000) = %10000000)
|
|
and ((ord(p[2]) and %11000000) = %10000000)
|
|
and ((ord(p[3]) and %11000000) = %10000000) then
|
|
exit(4)
|
|
else
|
|
exit(0);
|
|
end else
|
|
exit(0);
|
|
end;
|
|
|
|
function CeRawToUtf8(const Source: String): String;
|
|
var
|
|
P: PAnsiChar;
|
|
I, L: LongInt;
|
|
begin
|
|
L:= Length(Source);
|
|
// Try UTF-8 (this includes ASCII)
|
|
P:= PAnsiChar(Source);
|
|
repeat
|
|
if Ord(P^) < 128 then begin
|
|
// ASCII
|
|
if (P^ = #0) and (P - PAnsiChar(Source) >= L) then begin
|
|
Result:= Source;
|
|
Exit;
|
|
end;
|
|
Inc(P);
|
|
end else begin
|
|
I:= UTF8CharacterStrictLength(P);
|
|
if I = 0 then Break;
|
|
Inc(P, I);
|
|
end;
|
|
until False;
|
|
Result:= CeSysToUtf8(Source);
|
|
end;
|
|
|
|
function Dummy(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
end;
|
|
|
|
function Sys2UTF8(const Source: String): String;
|
|
begin
|
|
Result:= UTF8Encode(Source);
|
|
end;
|
|
|
|
function UTF82Sys(const Source: String): String;
|
|
begin
|
|
Result:= UTF8Decode(Source);
|
|
end;
|
|
|
|
{$IF DEFINED(MSWINDOWS)}
|
|
|
|
function CeTryEncode(const aValue: UnicodeString; aCodePage: Cardinal;
|
|
aAllowBestFit: Boolean; out aResult: AnsiString): Boolean;
|
|
// Try to encode the given Unicode string as the requested codepage
|
|
const
|
|
WC_NO_BEST_FIT_CHARS = $00000400;
|
|
Flags: array[Boolean] of DWORD = (WC_NO_BEST_FIT_CHARS, 0);
|
|
var
|
|
UsedDefault: BOOL;
|
|
begin
|
|
if not aAllowBestFit and not CheckWin32Version(4, 1) then
|
|
Result := False
|
|
else begin
|
|
SetLength(aResult, WideCharToMultiByte(aCodePage, Flags[aAllowBestFit],
|
|
PWideChar(aValue), Length(aValue), nil, 0, nil, @UsedDefault));
|
|
SetLength(aResult, WideCharToMultiByte(aCodePage, Flags[aAllowBestFit],
|
|
PWideChar(aValue), Length(aValue), PAnsiChar(aResult),
|
|
Length(aResult), nil, @UsedDefault));
|
|
Result := not UsedDefault;
|
|
end;
|
|
end;
|
|
|
|
function CeTryDecode(const aValue: AnsiString; aCodePage: Cardinal;
|
|
out aResult: UnicodeString): Boolean;
|
|
begin
|
|
SetLength(aResult, MultiByteToWideChar(aCodePage, MB_ERR_INVALID_CHARS,
|
|
LPCSTR(aValue), Length(aValue), nil, 0) * SizeOf(UnicodeChar));
|
|
SetLength(aResult, MultiByteToWideChar(aCodePage, MB_ERR_INVALID_CHARS,
|
|
LPCSTR(aValue), Length(aValue), PWideChar(aResult), Length(aResult)));
|
|
Result := Length(aResult) > 0;
|
|
end;
|
|
|
|
function Oem2Utf8(const Source: String): String;
|
|
var
|
|
UnicodeResult: UnicodeString;
|
|
begin
|
|
if CeTryDecode(Source, CP_OEMCP, UnicodeResult) then
|
|
Result:= UTF8Encode(UnicodeResult)
|
|
else
|
|
Result:= Source;
|
|
end;
|
|
|
|
function Utf82Oem(const Source: String): String;
|
|
var
|
|
AnsiResult: AnsiString;
|
|
begin
|
|
if CeTryEncode(UTF8Decode(Source), CP_OEMCP, False, AnsiResult) then
|
|
Result:= AnsiResult
|
|
else
|
|
Result:= Source;
|
|
end;
|
|
|
|
function OEM2Ansi(const Source: String): String;
|
|
var
|
|
Dst: PAnsiChar;
|
|
begin
|
|
Result:= Source;
|
|
Dst:= AllocMem((Length(Result) + 1) * SizeOf(AnsiChar));
|
|
if OEMToChar(PAnsiChar(Result), Dst) then
|
|
Result:= StrPas(Dst);
|
|
FreeMem(Dst);
|
|
end;
|
|
|
|
function Ansi2OEM(const Source: String): String;
|
|
var
|
|
Dst: PAnsiChar;
|
|
begin
|
|
Result := Source;
|
|
Dst := AllocMem((Length(Result) + 1) * SizeOf(AnsiChar));
|
|
if CharToOEM(PAnsiChar(Result), Dst) then
|
|
Result := StrPas(Dst);
|
|
FreeMem(Dst);
|
|
end;
|
|
|
|
procedure Initialize;
|
|
begin
|
|
CeOemToSys:= @OEM2Ansi;
|
|
CeSysToOem:= @Ansi2OEM;
|
|
CeOemToUtf8:= @Oem2Utf8;
|
|
CeUtf8ToOem:= @Utf82Oem;
|
|
CeAnsiToSys:= @Dummy;
|
|
CeSysToAnsi:= @Dummy;
|
|
CeAnsiToUtf8:= @Sys2UTF8;
|
|
CeUtf8ToAnsi:= @UTF82Sys;
|
|
CeSysToUtf8:= @Sys2UTF8;
|
|
CeUtf8ToSys:= @UTF82Sys;
|
|
end;
|
|
|
|
{$ELSEIF DEFINED(UNIX)}
|
|
|
|
const
|
|
EncodingUTF8 = 'UTF-8'; // UTF-8 Encoding
|
|
|
|
var
|
|
EncodingOEM, // OEM Encoding
|
|
EncodingANSI: String; // ANSI Encoding
|
|
|
|
function GetSystemEncoding(out Language, Encoding: String): Boolean;
|
|
{$IF DEFINED(DARWIN)}
|
|
var
|
|
LanguageCFArray: CFArrayRef = nil;
|
|
LanguageCFRef: CFStringRef = nil;
|
|
begin
|
|
LanguageCFArray:= CFLocaleCopyPreferredLanguages;
|
|
try
|
|
Result:= CFArrayGetCount(LanguageCFArray) > 0;
|
|
if Result then
|
|
begin
|
|
LanguageCFRef:= CFArrayGetValueAtIndex(LanguageCFArray, 0);
|
|
SetLength(Language, MAX_PATH);
|
|
Result:= CFStringGetCString(LanguageCFRef,
|
|
PAnsiChar(Language),
|
|
MAX_PATH,
|
|
kCFStringEncodingUTF8
|
|
);
|
|
if Result then
|
|
begin
|
|
Encoding:= EncodingUTF8;
|
|
Language:= Copy(Language, 1, 2);
|
|
end;
|
|
end;
|
|
finally
|
|
CFRelease(LanguageCFArray);
|
|
end;
|
|
end;
|
|
{$ELSE}
|
|
var
|
|
I: Integer;
|
|
Lang: String;
|
|
begin
|
|
Result:= True;
|
|
Lang:= SysUtils.GetEnvironmentVariable('LC_ALL');
|
|
if Length(Lang) = 0 then
|
|
begin
|
|
Lang:= SysUtils.GetEnvironmentVariable('LC_MESSAGES');
|
|
if Length(Lang) = 0 then
|
|
begin
|
|
Lang:= SysUtils.GetEnvironmentVariable('LANG');
|
|
if Length(Lang) = 0 then
|
|
Exit(False);
|
|
end;
|
|
end;
|
|
Language:= Copy(Lang, 1, 2);
|
|
I:= System.Pos('.', Lang);
|
|
if (I > 0) then
|
|
Encoding:= Copy(Lang, I + 1, Length(Lang) - I);
|
|
if Length(Encoding) = 0 then
|
|
Encoding:= EncodingUTF8;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
function Oem2Utf8(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
Iconvert(Source, Result, EncodingOEM, EncodingUTF8);
|
|
end;
|
|
|
|
function Utf82Oem(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
Iconvert(Source, Result, EncodingUTF8, EncodingOEM);
|
|
end;
|
|
|
|
function OEM2Sys(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
Iconvert(Source, Result, EncodingOEM, SystemEncoding);
|
|
end;
|
|
|
|
function Sys2OEM(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
Iconvert(Source, Result, SystemEncoding, EncodingOEM);
|
|
end;
|
|
|
|
function Ansi2Sys(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
Iconvert(Source, Result, EncodingANSI, SystemEncoding);
|
|
end;
|
|
|
|
function Sys2Ansi(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
Iconvert(Source, Result, SystemEncoding, EncodingANSI);
|
|
end;
|
|
|
|
function Ansi2Utf8(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
Iconvert(Source, Result, EncodingANSI, EncodingUTF8);
|
|
end;
|
|
|
|
function Utf82Ansi(const Source: String): String;
|
|
begin
|
|
Result:= Source;
|
|
Iconvert(Source, Result, EncodingUTF8, EncodingANSI);
|
|
end;
|
|
|
|
procedure Initialize;
|
|
var
|
|
Error: String;
|
|
begin
|
|
CeOemToSys:= @Dummy;
|
|
CeSysToOem:= @Dummy;
|
|
CeOemToUtf8:= @Dummy;
|
|
CeUtf8ToOem:= @Dummy;
|
|
CeAnsiToSys:= @Dummy;
|
|
CeSysToAnsi:= @Dummy;
|
|
CeUtf8ToSys:= @Dummy;
|
|
CeSysToUtf8:= @Dummy;
|
|
CeAnsiToUtf8:= @Dummy;
|
|
CeUtf8ToAnsi:= @Dummy;
|
|
|
|
// Try to get system encoding and initialize Iconv library
|
|
if not (GetSystemEncoding(SystemLanguage, SystemEncoding) and InitIconv(Error)) then
|
|
WriteLn(Error)
|
|
else
|
|
begin
|
|
SystemEncodingUtf8:= (SysUtils.CompareText(SystemEncoding, 'UTF-8') = 0) or
|
|
(SysUtils.CompareText(SystemEncoding, 'UTF8') = 0);
|
|
if (SystemLanguage = 'be') or (SystemLanguage = 'ru') or
|
|
(SystemLanguage = 'uk') then
|
|
begin
|
|
EncodingOEM:= 'CP866';
|
|
CeOemToSys:= @OEM2Sys;
|
|
CeSysToOem:= @Sys2OEM;
|
|
CeOemToUtf8:= @Oem2Utf8;
|
|
CeUtf8ToOem:= @Utf82Oem;
|
|
end;
|
|
if (SystemLanguage = 'be') or (SystemLanguage = 'bg') or
|
|
(SystemLanguage = 'ru') or (SystemLanguage = 'uk') then
|
|
begin
|
|
EncodingANSI:= 'CP1251';
|
|
CeAnsiToSys:= @Ansi2Sys;
|
|
CeSysToAnsi:= @Sys2Ansi;
|
|
CeAnsiToUtf8:= @Ansi2Utf8;
|
|
CeUtf8ToAnsi:= @Utf82Ansi;
|
|
end;
|
|
if not SystemEncodingUtf8 then
|
|
begin
|
|
CeUtf8ToSys:= @UTF82Sys;
|
|
CeSysToUtf8:= @Sys2UTF8;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{$ELSE}
|
|
|
|
procedure Initialize;
|
|
begin
|
|
CeOemToSys:= @Dummy;
|
|
CeSysToOem:= @Dummy;
|
|
CeOemToUtf8:= @Dummy;
|
|
CeUtf8ToOem:= @Dummy;
|
|
CeAnsiToSys:= @Dummy;
|
|
CeSysToAnsi:= @Dummy;
|
|
CeUtf8ToSys:= @Dummy;
|
|
CeSysToUtf8:= @Dummy;
|
|
CeAnsiToUtf8:= @Dummy;
|
|
CeUtf8ToAnsi:= @Dummy;
|
|
end;
|
|
|
|
{$ENDIF}
|
|
|
|
initialization
|
|
Initialize;
|
|
|
|
end.
|