2209 lines
61 KiB
ObjectPascal
2209 lines
61 KiB
ObjectPascal
{ *************************************************************************** }
|
|
{ }
|
|
{ EControl Syntax Editor SDK }
|
|
{ }
|
|
{ Copyright (c) 2004 - 2015 EControl Ltd., Zaharov Michael }
|
|
{ www.econtrol.ru }
|
|
{ support@econtrol.ru }
|
|
{ }
|
|
{ *************************************************************************** }
|
|
|
|
{$mode delphi}
|
|
{$define EC_UNICODE}
|
|
|
|
unit ecZRegExpr;
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, ecStrUtils,
|
|
{$IFDEF RE_DEBUG}ComCtrls,{$ENDIF}
|
|
Dialogs;
|
|
|
|
const
|
|
MaskModI = 1; // modifier /i bit in fModifiers
|
|
MaskModR = 2; // -"- /r
|
|
MaskModS = 4; // -"- /s
|
|
MaskModG = 8; // -"- /g
|
|
MaskModM = 16; // -"- /m
|
|
MaskModX = 32; // -"- /x
|
|
|
|
type
|
|
TecRegExpr = class(TPersistent)
|
|
private
|
|
FProgRoot: TObject;
|
|
FModifiers: Word;
|
|
FMatchOK: Boolean;
|
|
FCodePage: Cardinal;
|
|
FExpression: ecString;
|
|
FUnicodeCompiled: Boolean;
|
|
FModifiersStatic: Word;
|
|
procedure SetModifiers(const Value: Word);
|
|
function GetModifier(const Index: Integer): boolean;
|
|
function GetModifierStr: ecString;
|
|
procedure SetModifier(const Index: Integer; const Value: boolean);
|
|
procedure SetModifierStr(const Value: ecString);
|
|
function GetIsInvalid: Boolean;
|
|
function GetMatchLen(Idx: integer): integer;
|
|
function GetMatchPos(Idx: integer): integer;
|
|
function GetSubExprMatchCount: integer;
|
|
procedure SetCodePage(const Value: Cardinal);
|
|
procedure SetExpression(const Value: ecString);
|
|
procedure ClearRoot;
|
|
function IsEmpty: Boolean;
|
|
procedure ParseModifiers(const S: AnsiString; var Modifiers: Word);
|
|
public
|
|
constructor Create;
|
|
destructor Destroy; override;
|
|
procedure Assign(Source: TPersistent); override;
|
|
|
|
function Compile: Boolean; overload;
|
|
function Compile(AsUnicode: Boolean): Boolean; overload;
|
|
procedure Compile(const AExpression: AnsiString); overload;
|
|
procedure Compile(const AExpression: UCString); overload;
|
|
function Match(const InputString: UCString; var aPos: integer; Back: Boolean = False): Boolean; overload;
|
|
function MatchLength(const InputString: AnsiString; aPos: integer; Back: Boolean = False): integer; overload;
|
|
function MatchLength(const InputString: UCString; aPos: integer; Back: Boolean = False): integer; overload;
|
|
function GetMatch(const InputString: AnsiString; SubIdx: integer): AnsiString; overload;
|
|
function GetMatch(const InputString: UCString; SubIdx: integer): UCString; overload;
|
|
function Substitute (const InputString, ATemplate : ecString) : ecString;
|
|
|
|
property ModifierMask: Word read FModifiers write SetModifiers;
|
|
property ModifierI: boolean index MaskModI read GetModifier write SetModifier;
|
|
property ModifierR: boolean index MaskModR read GetModifier write SetModifier;
|
|
property ModifierS: boolean index MaskModS read GetModifier write SetModifier;
|
|
property ModifierG: boolean index MaskModG read GetModifier write SetModifier;
|
|
property ModifierM: boolean index MaskModM read GetModifier write SetModifier;
|
|
property ModifierX: boolean index MaskModX read GetModifier write SetModifier;
|
|
property IsInvalid: Boolean read GetIsInvalid;
|
|
|
|
property MatchPos[Idx: integer]: integer read GetMatchPos;
|
|
property MatchLen[Idx: integer]: integer read GetMatchLen;
|
|
property SubExprMatchCount: integer read GetSubExprMatchCount;
|
|
published
|
|
property Expression: ecString read FExpression write SetExpression;
|
|
property ModifierStr: ecString read GetModifierStr write SetModifierStr stored False;
|
|
property ModifierFlags: Word read FModifiers write FModifiers;
|
|
property ModifiersStatic: Word read FModifiersStatic write FModifiersStatic;
|
|
end;
|
|
|
|
type
|
|
TGetCustomCharClass = function(ClassCode: UCChar): Boolean;
|
|
TCheckCustomCharClass = function(ClassCode: UCChar; CharCode: Word; var IsInClass: Boolean): Boolean;
|
|
|
|
var
|
|
DefaultModifiers: integer = MaskModI or MaskModR or MaskModG or MaskModM or MaskModX;
|
|
GetCustomCharClassProc: TGetCustomCharClass = nil;
|
|
CheckCustomCharClassProc: TCheckCustomCharClass = nil;
|
|
|
|
const
|
|
// Error messages
|
|
zreUnexpectedEnd = 'Unexpected end of expression';
|
|
zreUnexpectedModifier = 'Unexpected modifier';
|
|
zreUnexpectedBracket = 'Unexpected bracket';
|
|
zreInvalidZeroWidth = 'Invalid zero-width expression';
|
|
|
|
{$IFDEF RE_DEBUG}
|
|
type TREDebugOnMatchProc = procedure(const TraceStr: string) of object;
|
|
var REDebugOnMatchProc: TREDebugOnMatchProc = nil;
|
|
procedure REDebugCompiledBuildTree(RE: TecRegExpr; TV: TTreeView);
|
|
{$ENDIF}
|
|
|
|
|
|
implementation
|
|
|
|
uses SysUtils, Contnrs, Math;
|
|
|
|
{$IFDEF RE_DEBUG}
|
|
var
|
|
LastNodeID: integer;
|
|
{$ENDIF}
|
|
|
|
type
|
|
TreSubExpr = class;
|
|
TreRootNode = class;
|
|
// Base node class
|
|
TRENodeBase = class
|
|
private
|
|
{$IFDEF RE_DEBUG}
|
|
FNodeId: integer;
|
|
{$ENDIF}
|
|
FLoopMin: integer; // repeat at least
|
|
FLoopMax: integer; // not more than
|
|
FNonGreedy: Boolean; // is greedy
|
|
FNext: TRENodeBase;
|
|
FOwner: TreSubExpr;
|
|
function GetRoot: TreRootNode;
|
|
protected
|
|
public
|
|
constructor Create(AOwner: TreSubExpr); virtual;
|
|
|
|
function Match(const InputString: UCString; var aPos: integer): integer; overload; virtual; abstract;
|
|
function BackMatch(const InputString: UCString; var aPos: integer): integer; overload; virtual; abstract;
|
|
|
|
property Next: TRENodeBase read FNext write FNext;
|
|
property Owner: TreSubExpr read FOwner;
|
|
property Root: TreRootNode read GetRoot;
|
|
end;
|
|
|
|
// Char set & String
|
|
TCharSetNode = class(TRENodeBase)
|
|
private
|
|
FIgnoreCase: Boolean;
|
|
FInvert: Boolean;
|
|
FCharRanges: UCString;
|
|
FCharSets: UCString;
|
|
FCharArray: UCString;
|
|
procedure AddChar(C: AnsiChar); overload;
|
|
procedure AddRange(st, en: AnsiChar); overload;
|
|
function HasChar(C: AnsiChar): Boolean; overload;
|
|
procedure AddChar(C: UCChar); overload;
|
|
procedure AddRange(st, en: UCChar); overload;
|
|
procedure AddSet(C: UCChar); overload;
|
|
function HasChar(C: UCChar): Boolean; overload;
|
|
public
|
|
function Match(const InputString: UCString; var aPos: integer): integer; override;
|
|
function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
|
|
end;
|
|
|
|
// Simple text
|
|
TCharSeqNode = class(TRENodeBase)
|
|
private
|
|
FIgnoreCase: Boolean;
|
|
FChar: UCChar;
|
|
public
|
|
function Match(const InputString: UCString; var aPos: integer): integer; override;
|
|
function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
|
|
end;
|
|
|
|
// Special ckecking
|
|
TSpecCheckNode = class(TRENodeBase)
|
|
private
|
|
FCheckType: UCChar;
|
|
public
|
|
function Match(const InputString: UCString; var aPos: integer): integer; override;
|
|
function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
|
|
end;
|
|
|
|
// Reference to sub expression
|
|
TRefNode = class(TRENodeBase)
|
|
private
|
|
FRef: integer;
|
|
FIgnoreCase: Boolean;
|
|
function GetExprStr(const InputString: AnsiString): AnsiString; overload;
|
|
function GetExprStr(const InputString: UCString): UCString; overload;
|
|
public
|
|
function Match(const InputString: UCString; var aPos: integer): integer; override;
|
|
function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
|
|
end;
|
|
|
|
// Base class of containers
|
|
TreListNodeBase = class(TRENodeBase)
|
|
private
|
|
FList: TList;
|
|
public
|
|
constructor Create(AOwner: TreSubExpr); override;
|
|
destructor Destroy; override;
|
|
procedure Clear; virtual;
|
|
end;
|
|
|
|
// Branch of sub expression
|
|
TreBranchNode = class(TreListNodeBase)
|
|
private
|
|
function GetClassChar(C: UCChar; Modifiers: Word): UCChar;
|
|
public
|
|
procedure Add(Node: TRENodeBase);
|
|
procedure Invert;
|
|
procedure Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); overload;
|
|
procedure Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); overload;
|
|
function Match(const InputString: UCString; var aPos: integer): integer; override;
|
|
function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
|
|
end;
|
|
|
|
// Zero-width testing
|
|
// (?=RE) - positive, ahead
|
|
// (?!RE) - negative, ahead
|
|
// (?<=RE) - positive, behind
|
|
// (?<!RE) - negative, behind
|
|
TZeroWidth = class(TRENodeBase)
|
|
private
|
|
FIsBack: Boolean;
|
|
FNegative: Boolean;
|
|
FBranch: TreBranchNode;
|
|
function DoResult(MatchRes: integer): integer;
|
|
public
|
|
function Match(const InputString: UCString; var aPos: integer): integer; override;
|
|
function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
|
|
destructor Destroy; override;
|
|
end;
|
|
|
|
|
|
// Select operation (list of TreListNodeBase) => sub expression
|
|
TreSubExpr = class(TreListNodeBase)
|
|
private
|
|
FStart: integer; // first char of sub expression
|
|
FEnd: integer; // first char after sub expression
|
|
public
|
|
constructor Create(AOwner: TreSubExpr); override;
|
|
procedure Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); overload;
|
|
procedure Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); overload;
|
|
function Match(const InputString: UCString; var aPos: integer): integer; override;
|
|
function BackMatch(const InputString: UCString; var aPos: integer): integer; override;
|
|
end;
|
|
|
|
// Root node
|
|
TreRootNode = class(TreSubExpr)
|
|
private
|
|
FSubExpr: TList;
|
|
FOwner: TecRegExpr;
|
|
public
|
|
constructor Create(AOwner: TecRegExpr); reintroduce;
|
|
destructor Destroy; override;
|
|
procedure Clear; override;
|
|
function MatchStr(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean; overload;
|
|
|
|
property Owner: TecRegExpr read FOwner;
|
|
end;
|
|
|
|
|
|
// =============================================================================
|
|
// Utils functions
|
|
// =============================================================================
|
|
|
|
function Min(a, b: integer): integer;
|
|
begin
|
|
if a < b then Result := a else Result := b;
|
|
end;
|
|
|
|
function GetAbsoluteNext(Node: TRENodeBase):TRENodeBase;
|
|
begin
|
|
Result := Node.Next;
|
|
if (Result = nil) and (Node.Owner <> nil) then
|
|
Result := GetAbsoluteNext(Node.Owner);
|
|
end;
|
|
|
|
function IsInRange(const RngType: UCChar; const C: AnsiChar): Boolean; overload;
|
|
begin
|
|
Result := False;
|
|
if not Assigned(CheckCustomCharClassProc) or
|
|
not CheckCustomCharClassProc(RngType, Ord(C), Result)
|
|
then
|
|
case RngType of
|
|
'w': Result := IsWordChar(C);
|
|
'W': Result := not IsWordChar(C);
|
|
'd': Result := IsDigitChar(C);
|
|
'D': Result := not IsDigitChar(C);
|
|
's': Result := IsSpaceChar(C);
|
|
'S': Result := not IsSpaceChar(C);
|
|
'h': Result := IsHexDigitChar(C);
|
|
'H': Result := not IsHexDigitChar(C);
|
|
'l': Result := IsAlphaChar(C);
|
|
'L': Result := not IsAlphaChar(C);
|
|
'c': Result := IsIdentChar(C);
|
|
'C': Result := not IsIdentChar(C);
|
|
'g': Result := IsIdentLetterChar(C);
|
|
'G': Result := not IsIdentLetterChar(C);
|
|
'k': Result := IsIdentDigitChar(C);
|
|
'K': Result := not IsIdentDigitChar(C);
|
|
end;
|
|
end;
|
|
|
|
function IsInRange(const RngType: UCChar; const C: UCChar): Boolean; overload;
|
|
begin
|
|
Result := False;
|
|
if not Assigned(CheckCustomCharClassProc) or
|
|
not CheckCustomCharClassProc(RngType, Ord(C), Result)
|
|
then
|
|
case RngType of
|
|
'w': Result := IsWordChar(C);
|
|
'W': Result := not IsWordChar(C);
|
|
'd': Result := IsDigitChar(C);
|
|
'D': Result := not IsDigitChar(C);
|
|
's': Result := IsSpaceChar(C);
|
|
'S': Result := not IsSpaceChar(C);
|
|
'h': Result := IsHexDigitChar(C);
|
|
'H': Result := not IsHexDigitChar(C);
|
|
'l': Result := IsAlphaChar(C);
|
|
'L': Result := not IsAlphaChar(C);
|
|
'c': Result := IsIdentChar(C);
|
|
'C': Result := not IsIdentChar(C);
|
|
'g': Result := IsIdentLetterChar(C);
|
|
'G': Result := not IsIdentLetterChar(C);
|
|
'k': Result := IsIdentDigitChar(C);
|
|
'K': Result := not IsIdentDigitChar(C);
|
|
end;
|
|
end;
|
|
|
|
function GetEscape(const Expression: AnsiString; var aPos: integer): UCChar; overload;
|
|
var strt: integer;
|
|
begin
|
|
Result := #0;
|
|
case Expression[aPos] of
|
|
't': Result := #$9;
|
|
'n': Result := #$A;
|
|
'r': Result := #$D;
|
|
'f': Result := #$C;
|
|
'a': Result := #$7;
|
|
'e': Result := #$1B;
|
|
'x': begin
|
|
inc(aPos);
|
|
if aPos > Length(Expression) then
|
|
raise Exception.Create('Invalid escape char');
|
|
if Expression[aPos] = '{' then
|
|
begin
|
|
inc(aPos);
|
|
strt := aPos;
|
|
while (aPos < Length(Expression)) and (Expression[aPos] <> '}') do
|
|
inc(aPos);
|
|
Result := UCChar(StrToInt('$' + Trim(string(Copy(Expression, strt, aPos - strt)))))
|
|
end else
|
|
begin
|
|
Result := UCChar(StrToInt('$' + string(Copy(Expression, aPos, 2))));
|
|
Inc(aPos);
|
|
end;
|
|
if Result = '' then
|
|
raise Exception.Create('Invalid HEX char');
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function GetEscape(const Expression: UCString; var aPos: integer): UCChar; overload;
|
|
var strt: integer;
|
|
begin
|
|
Result := #0;
|
|
case Expression[aPos] of
|
|
't': Result := #$9;
|
|
'n': Result := #$A;
|
|
'r': Result := #$D;
|
|
'f': Result := #$C;
|
|
'a': Result := #$7;
|
|
'e': Result := #$1B;
|
|
'x': begin
|
|
inc(aPos);
|
|
if aPos > Length(Expression) then
|
|
raise Exception.Create('Invalid escape char');
|
|
if Expression[aPos] = '{' then
|
|
begin
|
|
inc(aPos);
|
|
strt := aPos;
|
|
while (aPos < Length(Expression)) and (Expression[aPos] <> '}') do
|
|
inc(aPos);
|
|
Result := UCChar(StrToInt('$' + Trim(Copy(Expression, strt, aPos - strt))));
|
|
end else
|
|
begin
|
|
Result := UCChar(StrToInt('$' + Copy(Expression, aPos, 2)));
|
|
Inc(aPos);
|
|
end;
|
|
if Result = '' then
|
|
raise Exception.Create('Invalid HEX char');
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{ TRENodeBase }
|
|
|
|
constructor TRENodeBase.Create(AOwner: TreSubExpr);
|
|
begin
|
|
inherited Create;
|
|
FOwner := AOwner;
|
|
FLoopMin := 1;
|
|
FLoopMax := 1;
|
|
{$IFDEF RE_DEBUG}
|
|
Inc(LastNodeID);
|
|
FNodeId := LastNodeID;
|
|
{$ENDIF}
|
|
end;
|
|
|
|
function TRENodeBase.GetRoot: TreRootNode;
|
|
var Node: TRENodeBase;
|
|
begin
|
|
Node := Self;
|
|
while Node.Owner <> nil do
|
|
Node := Node.Owner;
|
|
Result := Node as TreRootNode;
|
|
end;
|
|
|
|
{ TCharSetNode }
|
|
|
|
function TCharSetNode.HasChar(C: AnsiChar): Boolean;
|
|
var i, N, k: integer;
|
|
begin
|
|
N := Length(FCharRanges);
|
|
if N > 0 then
|
|
for i := 1 to N shr 1 do
|
|
begin
|
|
k := i shl 1;
|
|
if (Ord(FCharRanges[k]) >= Ord(C)) and
|
|
(Ord(FCharRanges[k - 1]) <= Ord(C)) then
|
|
begin
|
|
Result := True;
|
|
Exit;
|
|
end;
|
|
end;
|
|
for i := 1 to Length(FCharSets) do
|
|
if IsInRange(FCharSets[i], C) then
|
|
begin
|
|
Result := True;
|
|
Exit;
|
|
end;
|
|
Result := Pos(UCChar(C), FCharArray) <> 0;
|
|
end;
|
|
|
|
procedure TCharSetNode.AddChar(C: AnsiChar);
|
|
begin
|
|
if FIgnoreCase then
|
|
C := ecUpCase(C);
|
|
if not HasChar(C) then
|
|
FCharArray := FCharArray + C;
|
|
end;
|
|
|
|
procedure TCharSetNode.AddRange(st, en: AnsiChar);
|
|
begin
|
|
if FIgnoreCase then
|
|
begin
|
|
st := ecUpCase(st);
|
|
en := ecUpCase(en);
|
|
end;
|
|
FCharRanges := FCharRanges + UCChar(st) + UCChar(en);
|
|
end;
|
|
|
|
function TCharSetNode.HasChar(C: UCChar): Boolean;
|
|
var i, N, k: integer;
|
|
begin
|
|
N := Length(FCharRanges);
|
|
if N > 0 then
|
|
for i := 1 to N shr 1 do
|
|
begin
|
|
k := i shl 1;
|
|
if (Ord(FCharRanges[k]) >= Ord(C)) and
|
|
(Ord(FCharRanges[k - 1]) <= Ord(C)) then
|
|
begin
|
|
Result := True;
|
|
Exit;
|
|
end;
|
|
end;
|
|
for i := 1 to Length(FCharSets) do
|
|
if IsInRange(FCharSets[i], C) then
|
|
begin
|
|
Result := True;
|
|
Exit;
|
|
end;
|
|
Result := Pos(C, FCharArray) <> 0;
|
|
end;
|
|
|
|
procedure TCharSetNode.AddChar(C: UCChar);
|
|
begin
|
|
if FIgnoreCase then
|
|
C := ecUpCase(C);
|
|
if Pos(C, FCharArray) = 0 then
|
|
FCharArray := FCharArray + C;
|
|
end;
|
|
|
|
procedure TCharSetNode.AddRange(st, en: UCChar);
|
|
begin
|
|
if FIgnoreCase then
|
|
begin
|
|
st := ecUpCase(st);
|
|
en := ecUpCase(en);
|
|
end;
|
|
FCharRanges := FCharRanges + st + en;
|
|
end;
|
|
|
|
procedure TCharSetNode.AddSet(C: UCChar);
|
|
begin
|
|
FCharSets := FCharSets + C;
|
|
end;
|
|
|
|
function TCharSetNode.Match(const InputString: UCString; var aPos: integer): integer;
|
|
var C: UCChar;
|
|
b: Boolean;
|
|
begin
|
|
if aPos > Length(InputString) then
|
|
begin
|
|
Result := 0;
|
|
Exit;
|
|
end;
|
|
|
|
C := InputString[aPos];
|
|
if FIgnoreCase then C := ecUpCase(C);
|
|
b := HasChar(C);
|
|
if FInvert then
|
|
b := not b;
|
|
if b then Inc(aPos);
|
|
if b then Result := 1 else Result := 0;
|
|
end;
|
|
|
|
function TCharSetNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
|
|
var C: UCChar;
|
|
b: Boolean;
|
|
begin
|
|
if aPos <= 1 then
|
|
begin
|
|
Result := 0;
|
|
Exit;
|
|
end;
|
|
|
|
C := InputString[aPos - 1];
|
|
if FIgnoreCase then C := ecUpCase(C);
|
|
b := HasChar(C);
|
|
if FInvert then
|
|
b := not b;
|
|
if b then Dec(aPos);
|
|
if b then Result := 1 else Result := 0;
|
|
end;
|
|
|
|
{ TCharSeqNode }
|
|
|
|
function TCharSeqNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
|
|
var C: UCChar;
|
|
begin
|
|
Result := 0;
|
|
if aPos > 1 then
|
|
begin
|
|
C := InputString[aPos - 1];
|
|
if FIgnoreCase then
|
|
C := ecUpCase(C);
|
|
if C = FChar then
|
|
begin
|
|
Dec(aPos);
|
|
Result := 1;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TCharSeqNode.Match(const InputString: UCString; var aPos: integer): integer;
|
|
var C: UCChar;
|
|
begin
|
|
Result := 0;
|
|
if aPos <= Length(InputString) then
|
|
begin
|
|
C := InputString[aPos];
|
|
if FIgnoreCase then
|
|
C := ecUpCase(C);
|
|
if C = FChar then
|
|
begin
|
|
Inc(aPos, 1);
|
|
Result := 1;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{ TSpecCheckNode }
|
|
|
|
function TSpecCheckNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
|
|
var C: UCChar;
|
|
b: Boolean;
|
|
begin
|
|
b := False;
|
|
if aPos <= 1 then
|
|
begin
|
|
if aPos < 1 then
|
|
begin
|
|
Result := 0;
|
|
Exit;
|
|
end;
|
|
C := #0;
|
|
end
|
|
else C := InputString[aPos - 1];
|
|
|
|
case FCheckType of
|
|
'^': b := (C = #0) or (C = #10) or
|
|
(C = #13) and (aPos <= Length(InputString)) and (InputString[aPos] <> #10);
|
|
'$': b := (aPos > Length(InputString)) or (InputString[aPos] = #13) or
|
|
(InputString[aPos] = #10) and (C <> #13);
|
|
'A': b := C = #0;
|
|
'Z': b := aPos > Length(InputString);
|
|
'b': b := IsWordBreak(aPos, InputString);
|
|
'B': b := not IsWordBreak(aPos, InputString);
|
|
'z': begin
|
|
b := IsLineBreakChar(C);
|
|
if b then
|
|
begin
|
|
Dec(aPos);
|
|
if (C = #10) and (aPos > 0) and (InputString[aPos] = #13) then
|
|
Dec(aPos);
|
|
end;
|
|
end;
|
|
else if C<> #0 then
|
|
begin
|
|
case FCheckType of
|
|
'.': b := True;
|
|
':': b := not IsLineBreakChar(C);
|
|
else b := IsInRange(FCheckType, C);
|
|
end;
|
|
if b then Dec(aPos);
|
|
end;
|
|
end;
|
|
if b then Result := 1 else Result := 0;
|
|
end;
|
|
|
|
function TSpecCheckNode.Match(const InputString: UCString; var aPos: integer): integer;
|
|
var C: UCChar;
|
|
b: Boolean;
|
|
begin
|
|
b := False;
|
|
if aPos > Length(InputString) then
|
|
begin
|
|
if aPos - 1 > Length(InputString) then
|
|
begin
|
|
Result := 0;
|
|
Exit;
|
|
end;
|
|
C := #0;
|
|
end
|
|
else C := InputString[aPos];
|
|
|
|
case FCheckType of
|
|
'^': b := (aPos = 1) or (InputString[aPos - 1] = #10) or
|
|
(InputString[aPos - 1] = #13) and (C <> #10);
|
|
'$': b := (C = #13) or (C = #10) and (aPos > 1) and (InputString[aPos - 1] <> #13) or (C = #0);
|
|
'A': b := aPos = 1;
|
|
'Z': b := C = #0;
|
|
'b': b := IsWordBreak(aPos, InputString);
|
|
'B': b := not IsWordBreak(aPos, InputString);
|
|
'z': begin
|
|
b := IsLineBreakChar(C);
|
|
if b then
|
|
begin
|
|
Inc(aPos);
|
|
if (C = #13) and (aPos <= Length(InputString)) and (InputString[aPos] = #10) then
|
|
Inc(aPos);
|
|
end;
|
|
end;
|
|
else if C<> #0 then
|
|
begin
|
|
case FCheckType of
|
|
'.': b := True;
|
|
':': b := not IsLineBreakChar(C);
|
|
else b := IsInRange(FCheckType, C);
|
|
end;
|
|
if b then Inc(aPos);
|
|
end;
|
|
end;
|
|
if b then Result := 1 else Result := 0;
|
|
end;
|
|
|
|
{ TRefNode }
|
|
|
|
function TRefNode.GetExprStr(const InputString: AnsiString): AnsiString;
|
|
var se: TreSubExpr;
|
|
l: integer;
|
|
begin
|
|
Result := '';
|
|
with Root do
|
|
if FSubExpr.Count > FRef then
|
|
begin
|
|
se := TreSubExpr(FSubExpr[FRef]);
|
|
l := abs(se.FEnd - se.FStart);
|
|
if (se.FStart > 0) and (se.FEnd > 0) then
|
|
Result := Copy(InputString, Min(se.FStart, se.FEnd), l);
|
|
end;
|
|
end;
|
|
|
|
function TRefNode.GetExprStr(const InputString: UCString): UCString;
|
|
var se: TreSubExpr;
|
|
l: integer;
|
|
begin
|
|
Result := '';
|
|
with Root do
|
|
if FSubExpr.Count > FRef then
|
|
begin
|
|
se := TreSubExpr(FSubExpr[FRef]);
|
|
l := abs(se.FEnd - se.FStart);
|
|
if (se.FStart > 0) and (se.FEnd > 0) then
|
|
Result := Copy(InputString, Min(se.FStart, se.FEnd), l);
|
|
end;
|
|
end;
|
|
|
|
function TRefNode.BackMatch(const InputString: UCString;
|
|
var aPos: integer): integer;
|
|
var S, S1: UCString;
|
|
L: integer;
|
|
b: Boolean;
|
|
begin
|
|
Result := 0;
|
|
S := GetExprStr(InputString);
|
|
L := Length(S);
|
|
if (L > 0) and (L < aPos) then
|
|
begin
|
|
S1 := Copy(InputString, aPos - L, L);
|
|
if FIgnoreCase then b := SameText(S, S1)
|
|
else b := S = S1;
|
|
if b then
|
|
begin
|
|
Dec(aPos, L);
|
|
Result := 1;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TRefNode.Match(const InputString: UCString;
|
|
var aPos: integer): integer;
|
|
var S, S1: UCString;
|
|
l: integer;
|
|
b: Boolean;
|
|
begin
|
|
Result := 0;
|
|
S := GetExprStr(InputString);
|
|
L := Length(S);
|
|
if (L > 0) and (L <= Length(InputString) - aPos + 1) then
|
|
begin
|
|
S1 := Copy(InputString, aPos, l);
|
|
if FIgnoreCase then b := SameText(S, S1)
|
|
else b := S = S1;
|
|
if b then
|
|
begin
|
|
Inc(aPos, L);
|
|
Result := 1;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{ TZeroWidth }
|
|
|
|
destructor TZeroWidth.Destroy;
|
|
begin
|
|
FreeAndNil(FBranch);
|
|
inherited;
|
|
end;
|
|
|
|
function TZeroWidth.DoResult(MatchRes: integer): integer;
|
|
begin
|
|
if FNegative then
|
|
begin
|
|
If MatchRes = 0 then
|
|
Result := 2
|
|
else
|
|
Result := 0;
|
|
end else
|
|
Result := MatchRes;
|
|
end;
|
|
|
|
function TZeroWidth.BackMatch(const InputString: UCString;
|
|
var aPos: integer): integer;
|
|
var testPos: integer;
|
|
begin
|
|
testPos := aPos;
|
|
if FIsBack then
|
|
Result := DoResult(FBranch.Match(InputString, testPos))
|
|
else
|
|
Result := DoResult(FBranch.BackMatch(InputString, testPos));
|
|
end;
|
|
|
|
function TZeroWidth.Match(const InputString: UCString;
|
|
var aPos: integer): integer;
|
|
var testPos: integer;
|
|
begin
|
|
testPos := aPos;
|
|
if FIsBack then
|
|
Result := DoResult(FBranch.BackMatch(InputString, testPos))
|
|
else
|
|
Result := DoResult(FBranch.Match(InputString, testPos));
|
|
end;
|
|
|
|
{ TreListNodeBase }
|
|
|
|
procedure TreListNodeBase.Clear;
|
|
begin
|
|
FList.Clear;
|
|
end;
|
|
|
|
constructor TreListNodeBase.Create(AOwner: TreSubExpr);
|
|
begin
|
|
inherited;
|
|
FList := TObjectList.Create;
|
|
end;
|
|
|
|
destructor TreListNodeBase.Destroy;
|
|
begin
|
|
FList.Free;
|
|
inherited;
|
|
end;
|
|
|
|
{ TreBranchNode }
|
|
|
|
function TreBranchNode.GetClassChar(C: UCChar; Modifiers: Word): UCChar;
|
|
begin
|
|
if Assigned(GetCustomCharClassProc) and GetCustomCharClassProc(C) then
|
|
Result := C else // User defined character class
|
|
if (MaskModR and Modifiers) = 0 then
|
|
case C of
|
|
'w': Result := 'c';
|
|
'W': Result := 'C';
|
|
'd': Result := 'k';
|
|
'D': Result := 'K';
|
|
'l': Result := 'g';
|
|
'L': Result := 'G';
|
|
's', 'S', 'h', 'H', 'c', 'C', 'g', 'G', 'k', 'K': Result := C;
|
|
else Result := #0;
|
|
end else
|
|
case C of
|
|
'w', 'W', 'd', 'D', 'l', 'L',
|
|
's', 'S', 'h', 'H', 'c', 'C', 'g', 'G', 'k', 'K': Result := C;
|
|
else Result := #0;
|
|
end;
|
|
end;
|
|
|
|
procedure TreBranchNode.Add(Node: TRENodeBase);
|
|
begin
|
|
if FList.Count > 0 then
|
|
TRENodeBase(FList.Last).Next := Node;
|
|
FList.Add(Node);
|
|
end;
|
|
|
|
procedure TreBranchNode.Invert;
|
|
var i, N: integer;
|
|
NextLast: TRENodeBase;
|
|
begin
|
|
N := FList.Count;
|
|
if N = 0 then Exit;
|
|
NextLast := TRENodeBase(FList.Last).Next;
|
|
for i := 0 to (N div 2) - 1 do
|
|
FList.Exchange(i, N - 1 - i);
|
|
for i := 0 to N - 2 do
|
|
TRENodeBase(FList[i]).Next := TRENodeBase(FList[i + 1]);
|
|
TRENodeBase(FList.Last).Next := NextLast;
|
|
end;
|
|
|
|
// All characters are ANSI characters.
|
|
// UCChar is used only for holding char codes less 255
|
|
// This is made to have unified storage of regular expr. nodes
|
|
procedure TreBranchNode.Compile(const Expression: AnsiString; var aPos: integer;
|
|
Modifiers: Word);
|
|
var Len: integer;
|
|
sub: TreSubExpr;
|
|
C: UCChar;
|
|
|
|
function SafeInc(RaiseEx: Boolean = False): AnsiChar; // Increment position to the next significant char
|
|
begin
|
|
inc(aPos);
|
|
// Skip spaces and comments
|
|
if (Modifiers and MaskModX) <> 0 then
|
|
while (aPos <= Len) and (IsSpaceChar(Expression[aPos]) or (Expression[aPos] = '#')) do
|
|
begin
|
|
if Expression[aPos] = '#' then
|
|
begin
|
|
Inc(aPos);
|
|
while (aPos <= Len) and not IsLineBreakChar(Expression[aPos]) do
|
|
Inc(aPos);
|
|
end;
|
|
Inc(aPos);
|
|
end;
|
|
|
|
if aPos <= Len then Result := Expression[aPos] else
|
|
begin
|
|
Result := #0;
|
|
if RaiseEx then
|
|
raise Exception.Create(zreUnexpectedEnd);
|
|
end;
|
|
end;
|
|
|
|
function ReadNumber: integer;
|
|
var strt: integer;
|
|
begin
|
|
strt := aPos;
|
|
while (aPos <= Len) and (Expression[aPos] >= '0') and (Expression[aPos] <= '9') do
|
|
Inc(aPos);
|
|
if strt = aPos then
|
|
raise Exception.Create('Number is expected');
|
|
Result := StrToInt(Copy(Expression, strt, aPos - strt));
|
|
Dec(aPos);
|
|
end;
|
|
|
|
// Read repeaters for node
|
|
procedure ReadRepeaters(Node: TRENodeBase);
|
|
begin
|
|
if aPos > Len then Exit;// repeaters are optional
|
|
case SafeInc of
|
|
'+': Node.FLoopMax := -1;
|
|
'?': Node.FLoopMin := 0;
|
|
'*': begin
|
|
Node.FLoopMax := -1;
|
|
Node.FLoopMin := 0;
|
|
end;
|
|
'{': begin
|
|
SafeInc;
|
|
Node.FLoopMin := ReadNumber;
|
|
case SafeInc of
|
|
',': if SafeInc <> '}' then
|
|
begin
|
|
Node.FLoopMax := ReadNumber;
|
|
if SafeInc <> '}' then
|
|
raise Exception.Create('There is no closing "}"');
|
|
end
|
|
else
|
|
Node.FLoopMax := -1;
|
|
'}': Node.FLoopMax := Node.FLoopMin;
|
|
else raise Exception.Create('Invalid loop specifier');
|
|
end;
|
|
if (Node.FLoopMax >= 0) and (Node.FLoopMax < Node.FLoopMin) then
|
|
raise Exception.Create('Loop minimum must be less then loop maximum');
|
|
end;
|
|
else begin
|
|
Dec(aPos);
|
|
Exit;
|
|
end
|
|
end;
|
|
if SafeInc = '?' then Node.FNonGreedy := True else
|
|
begin
|
|
Node.FNonGreedy := (Modifiers and MaskModG) = 0;
|
|
Dec(aPos);
|
|
end;
|
|
end;
|
|
|
|
procedure AddCharSeq(const C: UCChar);
|
|
var csNode: TCharSeqNode;
|
|
begin
|
|
csNode := TCharSeqNode.Create(Owner);
|
|
csNode.FIgnoreCase := (Modifiers and MaskModI) <> 0;
|
|
if csNode.FIgnoreCase then csNode.FChar := UCChar(ecUpCase(AnsiChar(C)))
|
|
else csNode.FChar := C;
|
|
Add(csNode);
|
|
ReadRepeaters(csNode);
|
|
end;
|
|
|
|
procedure AddSpecNode(const C: UCChar; WithRepeat: Boolean = True);
|
|
var sn: TSpecCheckNode;
|
|
begin
|
|
sn := TSpecCheckNode.Create(Owner);
|
|
sn.FCheckType := C;
|
|
Add(sn);
|
|
if WithRepeat then ReadRepeaters(sn);
|
|
end;
|
|
|
|
function PickSetChar(cs: TCharSetNode): UCChar;
|
|
begin
|
|
Result := UCChar(Expression[aPos]);
|
|
if Result = '\' then
|
|
begin
|
|
Inc(aPos);
|
|
if aPos > Length(Expression) then
|
|
Exit;
|
|
Result := GetEscape(Expression, aPos);
|
|
if Result = #0 then
|
|
begin
|
|
Result := GetClassChar(UCChar(Expression[aPos]), Modifiers);
|
|
if Result = #0 then Result := UCChar(Expression[aPos])
|
|
else
|
|
begin
|
|
cs.AddSet(Result);
|
|
Result := #0;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadCharSet;
|
|
var cs: TCharSetNode;
|
|
Cstrt, Cend: UCChar;
|
|
begin
|
|
cs := TCharSetNode.Create(Owner);
|
|
cs.FIgnoreCase := (Modifiers and MaskModI) <> 0;
|
|
Add(cs);
|
|
Cstrt := #0;
|
|
if SafeInc(True) = '^' then cs.FInvert := True
|
|
else Cstrt := PickSetChar(cs);
|
|
while SafeInc(True) <> ']' do
|
|
begin
|
|
if (Expression[aPos] = '-') and (Cstrt <> #0) then // Add Range
|
|
if SafeInc = ']' then
|
|
begin
|
|
cs.AddChar(Cstrt);
|
|
Cstrt := '-';
|
|
Break;
|
|
end else
|
|
begin
|
|
Cend := PickSetChar(cs);
|
|
if Cend = #0 then
|
|
begin
|
|
cs.AddChar(Cstrt);
|
|
Cstrt := '-';
|
|
end else
|
|
if Ord(Cend) < Ord(Cstrt) then
|
|
raise Exception.Create('Invalid set range') else
|
|
begin
|
|
// Extended russian support
|
|
cs.AddRange(Cstrt, Cend);
|
|
Cstrt := #0;
|
|
if SafeInc(True) = ']' then Break;
|
|
end;
|
|
end;
|
|
if Cstrt <> #0 then cs.AddChar(Cstrt);
|
|
Cstrt := PickSetChar(cs);
|
|
end;
|
|
if Cstrt <> #0 then cs.AddChar(Cstrt);
|
|
ReadRepeaters(cs);
|
|
end;
|
|
|
|
procedure AddRefNode(RefIdx: integer);
|
|
var rn: TRefNode;
|
|
begin
|
|
if TreRootNode(Root).FSubExpr.Count <= RefIdx then
|
|
raise Exception.Create('Invalid reference');
|
|
rn := TRefNode.Create(Owner);
|
|
rn.FRef := RefIdx;
|
|
rn.FIgnoreCase := (Modifiers and MaskModI) <> 0;
|
|
Add(rn);
|
|
ReadRepeaters(rn);
|
|
end;
|
|
|
|
procedure AddZeroWidth(IsBack: Boolean);
|
|
var Negative: Boolean;
|
|
Branch: TreBranchNode;
|
|
Node: TZeroWidth;
|
|
begin
|
|
case Expression[aPos] of
|
|
'!': Negative := True;
|
|
'=': Negative := False;
|
|
else
|
|
raise Exception.Create(zreInvalidZeroWidth);
|
|
end;
|
|
SafeInc(True);
|
|
Branch := TreBranchNode.Create(nil);
|
|
try
|
|
Branch.Compile(Expression, aPos, Modifiers);
|
|
Node := TZeroWidth.Create(Owner);
|
|
Node.FIsBack := IsBack;
|
|
Node.FNegative := Negative;
|
|
Node.FBranch := Branch;
|
|
Add(Node);
|
|
if IsBack then
|
|
Branch.Invert;
|
|
except
|
|
Branch.Free;
|
|
end;
|
|
end;
|
|
|
|
var tp: integer;
|
|
begin
|
|
Clear;
|
|
Len := Length(Expression);
|
|
Dec(aPos);
|
|
while aPos <= Len do
|
|
begin
|
|
case SafeInc of
|
|
')', '|', #0: Exit; // end of branch
|
|
'(': begin
|
|
if SafeInc = '?' then
|
|
begin // Change modifiers
|
|
case SafeInc(True) of
|
|
'<': begin
|
|
SafeInc(True);
|
|
AddZeroWidth(True);
|
|
end;
|
|
'=', '!': AddZeroWidth(False);
|
|
else
|
|
begin
|
|
tp := aPos;
|
|
repeat // skip comment
|
|
Inc(aPos);
|
|
until (aPos > Len) or (Expression[aPos] = ')');
|
|
if Expression[tp] <> '#' then
|
|
Root.Owner.ParseModifiers(Copy(Expression, tp, aPos - tp), Modifiers)
|
|
end;
|
|
end;
|
|
end else
|
|
begin // sub expression
|
|
sub := TreSubExpr.Create(Owner);
|
|
Add(sub);
|
|
sub.Compile(Expression, aPos, Modifiers);
|
|
if (aPos > Len) or (Expression[aPos] <> ')') then
|
|
raise Exception.Create('Do not closed sub expression');
|
|
ReadRepeaters(sub);
|
|
end;
|
|
end;
|
|
'[': begin // char set node
|
|
ReadCharSet;
|
|
end;
|
|
'^': if (Modifiers and MaskModM) = 0 then AddSpecNode('A', False) // begin of text
|
|
else AddSpecNode('^', False); // begin of line
|
|
'$': if (Modifiers and MaskModM) = 0 then AddSpecNode('Z', False) // end of text
|
|
else AddSpecNode('$', False); // end of line
|
|
'.': if (Modifiers and MaskModS) <> 0 then AddSpecNode('.') // all
|
|
else AddSpecNode(':'); // all without line separators
|
|
'\': begin
|
|
Inc(aPos);
|
|
if aPos > Len then C := '\'
|
|
else C := GetEscape(Expression, aPos);
|
|
if C <> #0 then AddCharSeq(C) else
|
|
begin
|
|
C := GetClassChar(UCChar(Expression[aPos]), Modifiers);
|
|
if C <> #0 then AddSpecNode(C) else
|
|
case Expression[aPos] of
|
|
'A', 'Z', 'b', 'B', 'z': AddSpecNode(UCChar(Expression[aPos]));
|
|
'1'..'9': AddRefNode(Ord(Expression[aPos])-Ord('0'));
|
|
else AddCharSeq(UCChar(Expression[aPos]));
|
|
end;
|
|
end;
|
|
end;
|
|
else AddCharSeq(UCChar(Expression[aPos])); // Simple char
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TreBranchNode.Compile(const Expression: UCString; var aPos: integer;
|
|
Modifiers: Word);
|
|
var Len: integer;
|
|
sub: TreSubExpr;
|
|
C: UCChar;
|
|
|
|
function SafeInc(RaiseEx: Boolean = False): UCChar; // Increment position to the next significant char
|
|
begin
|
|
inc(aPos);
|
|
// Skip spaces and comments
|
|
if (Modifiers and MaskModX) <> 0 then
|
|
while (aPos <= Len) and (IsSpaceChar(Expression[aPos]) or (Expression[aPos] = '#')) do
|
|
begin
|
|
if Expression[aPos] = '#' then
|
|
begin
|
|
Inc(aPos);
|
|
while (aPos <= Len) and not IsLineBreakChar(Expression[aPos]) do
|
|
Inc(aPos);
|
|
end;
|
|
Inc(aPos);
|
|
end;
|
|
|
|
if aPos <= Len then Result := Expression[aPos] else
|
|
begin
|
|
Result := #0;
|
|
if RaiseEx then
|
|
raise Exception.Create(zreUnexpectedEnd);
|
|
end;
|
|
end;
|
|
|
|
function ReadNumber: integer;
|
|
var strt: integer;
|
|
begin
|
|
strt := aPos;
|
|
while (aPos <= Len) and (Expression[aPos] >= '0') and (Expression[aPos] <= '9') do
|
|
Inc(aPos);
|
|
if strt = aPos then
|
|
raise Exception.Create('Number is expected');
|
|
Result := StrToInt(Copy(Expression, strt, aPos - strt));
|
|
Dec(aPos);
|
|
end;
|
|
|
|
// Read repeaters for node
|
|
procedure ReadRepeaters(Node: TRENodeBase);
|
|
begin
|
|
if aPos > Len then Exit;// repeaters are optional
|
|
case SafeInc of
|
|
'+': Node.FLoopMax := -1;
|
|
'?': Node.FLoopMin := 0;
|
|
'*': begin
|
|
Node.FLoopMax := -1;
|
|
Node.FLoopMin := 0;
|
|
end;
|
|
'{': begin
|
|
SafeInc;
|
|
Node.FLoopMin := ReadNumber;
|
|
case SafeInc of
|
|
',': if SafeInc <> '}' then
|
|
begin
|
|
Node.FLoopMax := ReadNumber;
|
|
if SafeInc <> '}' then
|
|
raise Exception.Create('There is no closing "}"');
|
|
end
|
|
else
|
|
Node.FLoopMax := -1;
|
|
'}': Node.FLoopMax := Node.FLoopMin;
|
|
else raise Exception.Create('Invalid loop specifier');
|
|
end;
|
|
if (Node.FLoopMax >= 0) and (Node.FLoopMax < Node.FLoopMin) then
|
|
raise Exception.Create('Loop minimum must be less then loop maximum');
|
|
end;
|
|
else begin
|
|
Dec(aPos);
|
|
Exit;
|
|
end
|
|
end;
|
|
if SafeInc = '?' then Node.FNonGreedy := True else
|
|
begin
|
|
Node.FNonGreedy := (Modifiers and MaskModG) = 0;
|
|
Dec(aPos);
|
|
end;
|
|
end;
|
|
|
|
procedure AddCharSeq(const C: UCChar);
|
|
var csNode: TCharSeqNode;
|
|
begin
|
|
csNode := TCharSeqNode.Create(Owner);
|
|
csNode.FIgnoreCase := (Modifiers and MaskModI) <> 0;
|
|
if csNode.FIgnoreCase then csNode.FChar := ecUpCase(C)
|
|
else csNode.FChar := C;
|
|
Add(csNode);
|
|
ReadRepeaters(csNode);
|
|
end;
|
|
|
|
procedure AddSpecNode(const C: UCChar; WithRepeat: Boolean = True);
|
|
var sn: TSpecCheckNode;
|
|
begin
|
|
sn := TSpecCheckNode.Create(Owner);
|
|
sn.FCheckType := C;
|
|
Add(sn);
|
|
if WithRepeat then ReadRepeaters(sn);
|
|
end;
|
|
|
|
function PickSetChar(cs: TCharSetNode): UCChar;
|
|
begin
|
|
Result := Expression[aPos];
|
|
if Result = '\' then
|
|
begin
|
|
Inc(aPos);
|
|
if aPos > Length(Expression) then
|
|
Exit;
|
|
Result := GetEscape(Expression, aPos);
|
|
if Result = #0 then
|
|
begin
|
|
Result := GetClassChar(Expression[aPos], Modifiers);
|
|
if Result = #0 then Result := Expression[aPos]
|
|
else
|
|
begin
|
|
cs.AddSet(Result);
|
|
Result := #0;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadCharSet;
|
|
var cs: TCharSetNode;
|
|
Cstrt, Cend: UCChar;
|
|
begin
|
|
cs := TCharSetNode.Create(Owner);
|
|
cs.FIgnoreCase := (Modifiers and MaskModI) <> 0;
|
|
Add(cs);
|
|
Cstrt := #0;
|
|
if SafeInc(True) = '^' then cs.FInvert := True
|
|
else Cstrt := PickSetChar(cs);
|
|
while SafeInc(True) <> ']' do
|
|
begin
|
|
if (Expression[aPos] = '-') and (Cstrt <> #0) then // Add Range
|
|
if SafeInc = ']' then
|
|
begin
|
|
cs.AddChar(Cstrt);
|
|
Cstrt := '-';
|
|
Break;
|
|
end else
|
|
begin
|
|
Cend := PickSetChar(cs);
|
|
if Cend = #0 then
|
|
begin
|
|
cs.AddChar(Cstrt);
|
|
Cstrt := '-';
|
|
end else
|
|
if Ord(Cend) < Ord(Cstrt) then
|
|
raise Exception.Create('Invalid set range') else
|
|
begin
|
|
// Extended russian support
|
|
cs.AddRange(Cstrt, Cend);
|
|
Cstrt := #0;
|
|
if SafeInc(True) = ']' then Break;
|
|
end;
|
|
end;
|
|
if Cstrt <> #0 then cs.AddChar(Cstrt);
|
|
Cstrt := PickSetChar(cs);
|
|
end;
|
|
if Cstrt <> #0 then cs.AddChar(Cstrt);
|
|
ReadRepeaters(cs);
|
|
end;
|
|
|
|
procedure AddRefNode(RefIdx: integer);
|
|
var rn: TRefNode;
|
|
begin
|
|
if TreRootNode(Root).FSubExpr.Count <= RefIdx then
|
|
raise Exception.Create('Invalid reference');
|
|
rn := TRefNode.Create(Owner);
|
|
rn.FRef := RefIdx;
|
|
rn.FIgnoreCase := (Modifiers and MaskModI) <> 0;
|
|
Add(rn);
|
|
ReadRepeaters(rn);
|
|
end;
|
|
|
|
procedure AddZeroWidth(IsBack: Boolean);
|
|
var Negative: Boolean;
|
|
Branch: TreBranchNode;
|
|
Node: TZeroWidth;
|
|
begin
|
|
case Expression[aPos] of
|
|
'!': Negative := True;
|
|
'=': Negative := False;
|
|
else
|
|
raise Exception.Create(zreInvalidZeroWidth);
|
|
end;
|
|
SafeInc(True);
|
|
Branch := TreBranchNode.Create(nil);
|
|
try
|
|
Branch.Compile(Expression, aPos, Modifiers);
|
|
Node := TZeroWidth.Create(Owner);
|
|
Node.FIsBack := IsBack;
|
|
Node.FNegative := Negative;
|
|
Node.FBranch := Branch;
|
|
Add(Node);
|
|
if IsBack then
|
|
Branch.Invert;
|
|
except
|
|
Branch.Free;
|
|
end;
|
|
end;
|
|
|
|
var tp: integer;
|
|
begin
|
|
Clear;
|
|
Len := Length(Expression);
|
|
Dec(aPos);
|
|
while aPos <= Len do
|
|
begin
|
|
case SafeInc of
|
|
')', '|', #0: Exit; // end of branch
|
|
'(': begin
|
|
if SafeInc = '?' then
|
|
begin // Change modifiers
|
|
case SafeInc(True) of
|
|
'<': begin
|
|
SafeInc(True);
|
|
AddZeroWidth(True);
|
|
end;
|
|
'=', '!': AddZeroWidth(False);
|
|
else
|
|
begin
|
|
tp := aPos;
|
|
repeat // skip comment
|
|
Inc(aPos);
|
|
until (aPos > Len) or (Expression[aPos] = ')');
|
|
if Expression[tp] <> '#' then
|
|
Root.Owner.ParseModifiers(Copy(Expression, tp, aPos - tp), Modifiers)
|
|
end;
|
|
end;
|
|
end else
|
|
begin // sub expression
|
|
sub := TreSubExpr.Create(Owner);
|
|
Add(sub);
|
|
sub.Compile(Expression, aPos, Modifiers);
|
|
if (aPos > Len) or (Expression[aPos] <> ')') then
|
|
raise Exception.Create('Do not closed sub expression');
|
|
ReadRepeaters(sub);
|
|
end;
|
|
end;
|
|
'[': begin // char set node
|
|
ReadCharSet;
|
|
end;
|
|
'^': if (Modifiers and MaskModM) = 0 then AddSpecNode('A', False) // begin of text
|
|
else AddSpecNode('^', False); // begin of line
|
|
'$': if (Modifiers and MaskModM) = 0 then AddSpecNode('Z', False) // end of text
|
|
else AddSpecNode('$', False); // end of line
|
|
'.': if (Modifiers and MaskModS) <> 0 then AddSpecNode('.') // all
|
|
else AddSpecNode(':'); // all without line separators
|
|
'\': begin
|
|
Inc(aPos);
|
|
if aPos > Len then C := '\'
|
|
else C := GetEscape(Expression, aPos);
|
|
if C <> #0 then AddCharSeq(C) else
|
|
begin
|
|
C := GetClassChar(Expression[aPos], Modifiers);
|
|
if C <> #0 then AddSpecNode(C) else
|
|
case Expression[aPos] of
|
|
'A', 'Z', 'b', 'B', 'z': AddSpecNode(Expression[aPos]);
|
|
'1'..'9': AddRefNode(Ord(Expression[aPos])-Ord('0'));
|
|
else AddCharSeq(Expression[aPos]);
|
|
end;
|
|
end;
|
|
end;
|
|
else AddCharSeq(Expression[aPos]); // Simple char
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
// Main mtaching routine (recursive)
|
|
// Returns:
|
|
// 0 - does not match
|
|
// 1 - match the Node
|
|
// 2 - match The Node and all next nodes
|
|
function MatchNode(Node: TRENodeBase; const InputString: UCString;
|
|
var aPos: integer): integer; overload;
|
|
|
|
var save, k, sv, LastSucc, total, Success: integer;
|
|
IsBrEnd: Boolean;
|
|
|
|
begin
|
|
if Node = nil then
|
|
begin
|
|
Result := 2;
|
|
Exit;
|
|
end;
|
|
Result := 0;
|
|
|
|
// required minimum repeat
|
|
save := aPos;
|
|
Success := 0;
|
|
for k := 1 to node.FLoopMin do
|
|
begin
|
|
Success := Node.Match(InputString, aPos);
|
|
if Success = 0 then
|
|
begin
|
|
aPos := save;
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, 0]));
|
|
{$ENDIF}
|
|
Exit;
|
|
end;
|
|
end;
|
|
|
|
k := node.FLoopMin - 1;
|
|
// k := 0;
|
|
LastSucc := 0;
|
|
total := 0;
|
|
|
|
IsBrEnd := Assigned(Node.Owner) and
|
|
((Node.Next = nil) or (Node.Next.Owner <> Node.Owner));
|
|
repeat
|
|
Inc(k);
|
|
sv := aPos;
|
|
if IsBrEnd then Node.Owner.FEnd := aPos;
|
|
if Node is TreSubExpr then
|
|
begin
|
|
if Success <> 2 then
|
|
Success := MatchNode(GetAbsoluteNext(Node), InputString, aPos);
|
|
end else
|
|
if Node.Next <> nil then
|
|
Success := MatchNode(Node.Next, InputString, aPos) else
|
|
if IsBrEnd then
|
|
Success := MatchNode(GetAbsoluteNext(Node.Owner), InputString, aPos) else
|
|
Success := 2;
|
|
|
|
if Success = 2 then // success all next nodes
|
|
begin
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Success at %d; Node: %d; NextRes: %s',[sv, Node.FNodeID, IntToStr(Success)]));
|
|
{$ENDIF}
|
|
total := aPos;
|
|
LastSucc := sv;
|
|
aPos := LastSucc;
|
|
Result := Success;
|
|
if node.FNonGreedy then
|
|
Break; // for non GREEDY mode
|
|
end else
|
|
if IsBrEnd then
|
|
begin
|
|
total := sv;
|
|
Result := 1;
|
|
end else
|
|
if Success = 1 then
|
|
begin
|
|
aPos := sv;
|
|
total := sv;
|
|
Result := 1;
|
|
if node.FNonGreedy then
|
|
Break; // for non GREEDY mode
|
|
end;
|
|
|
|
if (node.FLoopMax > 0) and (k >= node.FLoopMax) then // check max limit
|
|
begin
|
|
if not IsBrEnd then //v2.36
|
|
Result := Success; //v2.33
|
|
Break;
|
|
end;
|
|
|
|
sv := aPos;
|
|
Success := Node.Match(InputString, aPos);
|
|
until (Success = 0) or (aPos = sv);
|
|
|
|
// if (node.FLoopMin > 0) and (k < node.FLoopMin) then
|
|
// Result := 0;
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, integer(Result)]));
|
|
{$ENDIF}
|
|
if Result > 0 then
|
|
begin
|
|
aPos := total;
|
|
// Save sub-expression result
|
|
if IsBrEnd and (LastSucc > 0) then
|
|
Node.Owner.FEnd := LastSucc;
|
|
end else
|
|
begin
|
|
aPos := save;
|
|
if IsBrEnd then Node.Owner.FEnd := 0;
|
|
end;
|
|
end;
|
|
|
|
// Main mtaching routine (recursive)
|
|
function BackMatchNode(Node: TRENodeBase; const InputString: UCString;
|
|
var aPos: integer): integer; overload;
|
|
|
|
var save, k, sv, LastSucc, total, Success: integer;
|
|
IsBrEnd: Boolean;
|
|
|
|
begin
|
|
if Node = nil then
|
|
begin
|
|
Result := 2;
|
|
Exit;
|
|
end;
|
|
Result := 0;
|
|
|
|
// required minimum repeat
|
|
save := aPos;
|
|
Success := 0;
|
|
for k := 1 to node.FLoopMin do
|
|
begin
|
|
Success := Node.BackMatch(InputString, aPos);
|
|
if Success = 0 then
|
|
begin
|
|
aPos := save;
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, 0]));
|
|
{$ENDIF}
|
|
Exit;
|
|
end;
|
|
end;
|
|
|
|
k := node.FLoopMin - 1;
|
|
LastSucc := 0;
|
|
total := 0;
|
|
|
|
IsBrEnd := Assigned(Node.Owner) and
|
|
((Node.Next = nil) or (Node.Next.Owner <> Node.Owner));
|
|
repeat
|
|
Inc(k);
|
|
sv := aPos;
|
|
if IsBrEnd then Node.Owner.FStart := aPos;
|
|
if Node is TreSubExpr then
|
|
begin
|
|
if Success <> 2 then
|
|
Success := BackMatchNode(GetAbsoluteNext(Node), InputString, aPos);
|
|
end else
|
|
if Node.Next <> nil then
|
|
Success := BackMatchNode(Node.Next, InputString, aPos) else
|
|
if IsBrEnd then
|
|
Success := BackMatchNode(GetAbsoluteNext(Node.Owner), InputString, aPos) else
|
|
Success := 2;
|
|
|
|
if Success = 2 then
|
|
begin
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Success at %d; Node: %d; NextRes: %s',[sv, Node.FNodeID, IntToStr(Success)]));
|
|
{$ENDIF}
|
|
total := aPos;
|
|
LastSucc := sv;
|
|
aPos := LastSucc;
|
|
Result := Success;
|
|
if node.FNonGreedy then Break; // for non GREEDY mode
|
|
end else
|
|
if IsBrEnd then
|
|
begin
|
|
total := sv;
|
|
Result := 1;
|
|
end else
|
|
if Success = 1 then
|
|
begin
|
|
aPos := sv;
|
|
total := sv;
|
|
Result := 1;
|
|
end;
|
|
|
|
if (node.FLoopMax > 0) and (k >= node.FLoopMax) then // check max limit
|
|
begin
|
|
if not IsBrEnd then //v2.36
|
|
Result := Success; //v2.33
|
|
Break;
|
|
end;
|
|
|
|
sv := aPos;
|
|
Success := Node.BackMatch(InputString, aPos);
|
|
until (Success = 0) or (aPos = sv);
|
|
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, integer(Result)]));
|
|
{$ENDIF}
|
|
if Result > 0 then
|
|
begin
|
|
aPos := total;
|
|
// Save sub-expression result
|
|
if IsBrEnd and (LastSucc > 0) then
|
|
Node.Owner.FStart := LastSucc;
|
|
end else
|
|
begin
|
|
aPos := save;
|
|
if IsBrEnd then Node.Owner.FStart := 0;
|
|
end;
|
|
end;
|
|
|
|
function TreBranchNode.Match(const InputString: UCString; var aPos: integer): integer;
|
|
begin
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Branch<: %d; Position: %d',[FNodeID, aPos]));
|
|
{$ENDIF}
|
|
if FList.Count = 0 then Result := 1
|
|
else Result := MatchNode(TRENodeBase(FList.First), InputString, aPos);
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Branch>: %d; Position: %d; Result: %d',[FNodeID, aPos, integer(Result)]));
|
|
{$ENDIF}
|
|
end;
|
|
|
|
function TreBranchNode.BackMatch(const InputString: UCString; var aPos: integer): integer;
|
|
begin
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Branch<: %d; Position: %d',[FNodeID, aPos]));
|
|
{$ENDIF}
|
|
if FList.Count = 0 then Result := 1
|
|
else Result := BackMatchNode(TRENodeBase(FList.First), InputString, aPos);
|
|
{$IFDEF RE_DEBUG}
|
|
if Assigned(REDebugOnMatchProc) then
|
|
REDebugOnMatchProc(Format('Branch>: %d; Position: %d; Result: %d',[FNodeID, aPos, integer(Result)]));
|
|
{$ENDIF}
|
|
end;
|
|
|
|
{ TreSubExpr }
|
|
|
|
procedure TreSubExpr.Compile(const Expression: AnsiString; var aPos: integer;
|
|
Modifiers: Word);
|
|
var Br: TreBranchNode;
|
|
begin
|
|
Dec(aPos);
|
|
repeat
|
|
Inc(aPos);
|
|
Br := TreBranchNode.Create(Self);
|
|
FList.Add(Br);
|
|
Br.Compile(Expression, aPos, Modifiers);
|
|
if Br.FList.Count = 0 then FList.Remove(Br);
|
|
until (aPos > Length(Expression)) or (Expression[aPos] = ')');
|
|
end;
|
|
|
|
procedure TreSubExpr.Compile(const Expression: UCString; var aPos: integer;
|
|
Modifiers: Word);
|
|
var Br: TreBranchNode;
|
|
begin
|
|
Dec(aPos);
|
|
repeat
|
|
Inc(aPos);
|
|
Br := TreBranchNode.Create(Self);
|
|
FList.Add(Br);
|
|
Br.Compile(Expression, aPos, Modifiers);
|
|
if Br.FList.Count = 0 then FList.Remove(Br);
|
|
until (aPos > Length(Expression)) or (Expression[aPos] = ')');
|
|
end;
|
|
|
|
constructor TreSubExpr.Create(AOwner: TreSubExpr);
|
|
begin
|
|
inherited;
|
|
if Owner <> nil then
|
|
TreRootNode(Root).FSubExpr.Add(Self);
|
|
end;
|
|
|
|
function TreSubExpr.Match(const InputString: UCString; var aPos: integer): integer;
|
|
var i: integer;
|
|
OldEnd, OldStart, CurRes, svPos: integer;
|
|
begin
|
|
OldEnd := FEnd;
|
|
OldStart := FStart;
|
|
FStart := aPos;
|
|
FEnd := 0;
|
|
Result := 0;
|
|
svPos := aPos;
|
|
for i := 0 to FList.Count - 1 do
|
|
begin
|
|
CurRes := TreBranchNode(FList[i]).Match(InputString, aPos);
|
|
if FNonGreedy then
|
|
begin
|
|
if CurRes > 0 then
|
|
begin
|
|
OldEnd := FEnd;
|
|
OldStart := FStart;
|
|
Result := CurRes;
|
|
aPos := svPos;
|
|
Break;
|
|
end;
|
|
end else
|
|
if CurRes > Result then
|
|
begin
|
|
OldEnd := FEnd;
|
|
OldStart := FStart;
|
|
Result := CurRes;
|
|
aPos := svPos;
|
|
if Result = 2 then
|
|
Break;
|
|
end;
|
|
end;
|
|
FEnd := OldEnd;
|
|
FStart := OldStart;
|
|
if (FEnd > 0) and (Result > 0) then
|
|
aPos := FEnd;
|
|
end;
|
|
|
|
function TreSubExpr.BackMatch(const InputString: UCString; var aPos: integer): integer;
|
|
var i: integer;
|
|
OldEnd, OldStart, CurRes, svPos: integer;
|
|
begin
|
|
OldEnd := FEnd;
|
|
OldStart := FStart;
|
|
FStart := 0;
|
|
FEnd := aPos;
|
|
Result := 0;
|
|
svPos := aPos;
|
|
for i := 0 to FList.Count - 1 do
|
|
begin
|
|
CurRes := TreBranchNode(FList[i]).BackMatch(InputString, aPos);
|
|
if FNonGreedy then
|
|
begin
|
|
if CurRes > 0 then
|
|
begin
|
|
OldEnd := FEnd;
|
|
OldStart := FStart;
|
|
Result := CurRes;
|
|
aPos := svPos;
|
|
Break;
|
|
end;
|
|
end else
|
|
if CurRes > Result then
|
|
begin
|
|
OldEnd := FEnd;
|
|
OldStart := FStart;
|
|
Result := CurRes;
|
|
aPos := svPos;
|
|
if Result = 2 then
|
|
Break;
|
|
end;
|
|
end;
|
|
FEnd := OldEnd;
|
|
FStart := OldStart;
|
|
if (FStart > 0) and (Result > 0) then
|
|
aPos := FStart;
|
|
end;
|
|
|
|
{ TreRootNode }
|
|
|
|
procedure TreRootNode.Clear;
|
|
begin
|
|
inherited;
|
|
FSubExpr.Clear;
|
|
FSubExpr.Add(Self);
|
|
end;
|
|
|
|
constructor TreRootNode.Create(AOwner: TecRegExpr);
|
|
begin
|
|
inherited Create(nil);
|
|
FOwner := AOwner;
|
|
FSubExpr := TList.Create;
|
|
FSubExpr.Add(Self);
|
|
end;
|
|
|
|
destructor TreRootNode.Destroy;
|
|
begin
|
|
FSubExpr.Free;
|
|
inherited;
|
|
end;
|
|
|
|
function TreRootNode.MatchStr(const InputString: UCString;
|
|
var aPos: integer; Back: Boolean): Boolean;
|
|
var i: integer;
|
|
begin
|
|
for i := 0 to FSubExpr.Count - 1 do
|
|
begin
|
|
TreSubExpr(FSubExpr[i]).FStart := -1;
|
|
TreSubExpr(FSubExpr[i]).FEnd := -1;
|
|
end;
|
|
if Back then
|
|
Result := BackMatchNode(Self, InputString, aPos) <> 0
|
|
else
|
|
Result := MatchNode(Self, InputString, aPos) <> 0;
|
|
end;
|
|
|
|
// =============================================================================
|
|
// Application Level
|
|
// =============================================================================
|
|
|
|
{ TecRegExpr }
|
|
|
|
constructor TecRegExpr.Create;
|
|
begin
|
|
inherited;
|
|
FModifiers := DefaultModifiers;
|
|
end;
|
|
|
|
destructor TecRegExpr.Destroy;
|
|
begin
|
|
FreeAndNil(FProgRoot);
|
|
inherited;
|
|
end;
|
|
|
|
procedure TecRegExpr.ClearRoot;
|
|
begin
|
|
FreeAndNil(FProgRoot);
|
|
FMatchOK := False;
|
|
end;
|
|
|
|
function TecRegExpr.IsEmpty: Boolean;
|
|
begin
|
|
Result := not Assigned(FProgRoot) or (TreRootNode(FProgRoot).FList.Count = 0);
|
|
end;
|
|
|
|
procedure TecRegExpr.Compile(const AExpression: AnsiString);
|
|
var Pos: integer;
|
|
begin
|
|
{$IFDEF RE_DEBUG} LastNodeID := 0; {$ENDIF}
|
|
FMatchOK := False;
|
|
if not Assigned(FProgRoot) then
|
|
FProgRoot := TreRootNode.Create(Self)
|
|
else
|
|
TreRootNode(FProgRoot).Clear;
|
|
|
|
FUnicodeCompiled := False;
|
|
Pos := 1;
|
|
try
|
|
if AExpression <> '' then
|
|
TreRootNode(FProgRoot).Compile(AExpression, Pos, FModifiers)
|
|
except
|
|
ClearRoot;
|
|
raise;
|
|
end;
|
|
end;
|
|
|
|
procedure TecRegExpr.Compile(const AExpression: UCString);
|
|
var Pos: integer;
|
|
begin
|
|
{$IFDEF RE_DEBUG} LastNodeID := 0; {$ENDIF}
|
|
FMatchOK := False;
|
|
if not Assigned(FProgRoot) then
|
|
FProgRoot := TreRootNode.Create(Self)
|
|
else
|
|
TreRootNode(FProgRoot).Clear;
|
|
|
|
Pos := 1;
|
|
try
|
|
if AExpression <> '' then
|
|
TreRootNode(FProgRoot).Compile(AExpression, Pos, FModifiers);
|
|
except
|
|
ClearRoot;
|
|
raise;
|
|
end;
|
|
FUnicodeCompiled := True;
|
|
end;
|
|
|
|
function TecRegExpr.Compile: Boolean;
|
|
begin
|
|
Result := Compile(True);
|
|
end;
|
|
|
|
function TecRegExpr.Compile(AsUnicode: Boolean): Boolean;
|
|
begin
|
|
try
|
|
if IsEmpty or (AsUnicode xor FUnicodeCompiled) then
|
|
Compile(FExpression);
|
|
except
|
|
end;
|
|
Result := not IsEmpty;
|
|
end;
|
|
|
|
function TecRegExpr.GetIsInvalid: Boolean;
|
|
begin
|
|
Result := not Compile(False);
|
|
end;
|
|
|
|
function TecRegExpr.GetModifier(const Index: Integer): boolean;
|
|
begin
|
|
Result := (FModifiers and Index) <> 0;
|
|
end;
|
|
|
|
function TecRegExpr.GetModifierStr: ecString;
|
|
const ModLet: ecString = 'irsgmx';
|
|
var s1, s2: string;
|
|
i: integer;
|
|
begin
|
|
s1 := ''; s2 := '';
|
|
for i := 0 to 5 do
|
|
if (FModifiers and (1 shl i)) <> 0 then
|
|
s1 := s1 + ModLet[i + 1]
|
|
else
|
|
s2 := s2 + ModLet[i + 1];
|
|
|
|
Result := '(?' + s1;
|
|
if s2 <> '' then
|
|
Result := Result + '-' + s2;
|
|
Result := Result + ')';
|
|
end;
|
|
|
|
function TecRegExpr.Match(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean;
|
|
begin
|
|
Result := Compile(True); // ensure compiling and validity
|
|
if Result then
|
|
begin
|
|
if aPos < 1 then
|
|
aPos := 1;
|
|
Result := TreRootNode(FProgRoot).MatchStr(InputString, aPos, Back);
|
|
FMatchOK := Result;
|
|
end;
|
|
end;
|
|
|
|
function TecRegExpr.MatchLength(const InputString: AnsiString;
|
|
aPos: integer; Back: Boolean): integer;
|
|
begin
|
|
Result := aPos;
|
|
if Match(InputString, aPos, Back) then
|
|
begin
|
|
if Back then
|
|
Result := Result - aPos
|
|
else
|
|
Result := aPos - Result;
|
|
end
|
|
else
|
|
Result := 0;
|
|
end;
|
|
|
|
function TecRegExpr.MatchLength(const InputString: UCString;
|
|
aPos: integer; Back: Boolean): integer;
|
|
begin
|
|
Result := aPos;
|
|
if Match(InputString, aPos, Back) then
|
|
begin
|
|
if Back then
|
|
Result := Result - aPos
|
|
else
|
|
Result := aPos - Result;
|
|
end
|
|
else
|
|
Result := 0;
|
|
end;
|
|
|
|
|
|
//AT fix for SetExpression
|
|
function _MultilineTextToOneline(const AText: string): string;
|
|
var
|
|
L: TStringList;
|
|
S: string;
|
|
i, j: integer;
|
|
begin
|
|
L:= TStringList.Create;
|
|
try
|
|
L.TextLineBreakStyle:= tlbsLF; //force LF
|
|
L.Text:= AText;
|
|
|
|
//delete comments "#nnnnnnnnnnn"
|
|
for i:= 0 to L.Count-1 do
|
|
begin
|
|
S:= L[i];
|
|
for j:= 1 to Length(S) do
|
|
if (S[j]='#') and ((j=1) or (S[j-1]<>'\')) then
|
|
begin
|
|
Delete(S, j, Maxint);
|
|
L[i]:= S;
|
|
Break
|
|
end;
|
|
end;
|
|
|
|
Result:= Trim(L.Text);
|
|
Result:= StringReplace(Result, #10, ' ', [rfReplaceAll]);
|
|
finally
|
|
FreeAndNil(L);
|
|
end;
|
|
end;
|
|
|
|
procedure TecRegExpr.SetExpression(const Value: ecString);
|
|
begin
|
|
FExpression:= _MultilineTextToOneline(Value); //AT
|
|
|
|
//if Pos('#', FExpression)>0 then
|
|
// showmessage('regex'#13+FExpression);
|
|
|
|
ClearRoot;
|
|
end;
|
|
|
|
procedure TecRegExpr.SetModifier(const Index: Integer;
|
|
const Value: boolean);
|
|
begin
|
|
if Value then FModifiers := FModifiers or Index
|
|
else FModifiers := FModifiers and not Index;
|
|
ClearRoot;
|
|
end;
|
|
|
|
procedure TecRegExpr.SetModifiers(const Value: Word);
|
|
begin
|
|
FModifiers := Value;
|
|
ClearRoot;
|
|
end;
|
|
|
|
procedure TecRegExpr.SetModifierStr(const Value: ecString);
|
|
begin
|
|
if (Length(Value) >= 3) and (Copy(Value, 1, 2) = '(?') then
|
|
ParseModifiers(AnsiString(Copy(Value, 3, Length(Value) - 3)), FModifiers);
|
|
end;
|
|
|
|
function TecRegExpr.GetMatchLen(Idx: integer): integer;
|
|
begin
|
|
Result := -1;
|
|
if FMatchOK then
|
|
with TreRootNode(FProgRoot) do
|
|
if (idx < FSubExpr.Count) then
|
|
with TreSubExpr(FSubExpr[Idx]) do
|
|
if (FStart <> -1) and (FEnd <> -1) then
|
|
Result := FEnd - FStart;
|
|
end;
|
|
|
|
function TecRegExpr.GetMatchPos(Idx: integer): integer;
|
|
begin
|
|
Result := -1;
|
|
if FMatchOK then
|
|
with TreRootNode(FProgRoot) do
|
|
if (idx < FSubExpr.Count) then
|
|
with TreSubExpr(FSubExpr[Idx]) do
|
|
Result := FStart;
|
|
end;
|
|
|
|
function TecRegExpr.GetSubExprMatchCount: integer;
|
|
var i: integer;
|
|
begin
|
|
Result := -1;
|
|
if FMatchOK then
|
|
with TreRootNode(FProgRoot) do
|
|
for i := 0 to FSubExpr.Count - 1 do
|
|
with TreSubExpr(FSubExpr[i]) do
|
|
if (FStart <> -1) and (FEnd <> -1) then
|
|
Inc(Result);
|
|
end;
|
|
|
|
function TecRegExpr.GetMatch(const InputString: AnsiString;
|
|
SubIdx: integer): AnsiString;
|
|
begin
|
|
Result := '';
|
|
if FMatchOK then
|
|
with TreRootNode(FProgRoot) do
|
|
if (SubIdx < FSubExpr.Count) then
|
|
with TreSubExpr(FSubExpr[SubIdx]) do
|
|
if (FStart <> -1) and (FEnd <> -1) then
|
|
Result := Copy(InputString, FStart, FEnd - FStart);
|
|
end;
|
|
|
|
function TecRegExpr.GetMatch(const InputString: UCString;
|
|
SubIdx: integer): UCString;
|
|
begin
|
|
Result := '';
|
|
if FMatchOK then
|
|
with TreRootNode(FProgRoot) do
|
|
if (SubIdx < FSubExpr.Count) then
|
|
with TreSubExpr(FSubExpr[SubIdx]) do
|
|
if (FStart <> -1) and (FEnd <> -1) then
|
|
Result := Copy(InputString, FStart, FEnd - FStart);
|
|
end;
|
|
|
|
function TecRegExpr.Substitute(const InputString, ATemplate: ecString): ecString;
|
|
var i: integer;
|
|
C: ecChar;
|
|
begin
|
|
// if not FMatchOK then
|
|
// raise Exception.Create('No matched string');
|
|
Result := '';
|
|
i := 1;
|
|
while i <= Length(ATemplate) do
|
|
begin
|
|
if (ATemplate[i] = '\') and (i < Length(ATemplate)) then
|
|
begin
|
|
inc(i);
|
|
if IsDigitChar(ATemplate[i]) then
|
|
Result := Result + GetMatch(InputString, StrToInt(ATemplate[i]))
|
|
else
|
|
begin
|
|
C := ecChar(Ord(GetEscape(ATemplate, i)));
|
|
if C = #0 then C := ATemplate[i];
|
|
Result := Result + C;
|
|
end;
|
|
end else Result := Result + ATemplate[i];
|
|
inc(i);
|
|
end;
|
|
end;
|
|
|
|
procedure TecRegExpr.Assign(Source: TPersistent);
|
|
begin
|
|
if Source is TecRegExpr then
|
|
with Source as TecRegExpr do
|
|
begin
|
|
Self.Expression := Expression;
|
|
Self.ModifierMask := ModifierMask;
|
|
end
|
|
else inherited;
|
|
end;
|
|
|
|
procedure TecRegExpr.SetCodePage(const Value: Cardinal);
|
|
begin
|
|
if FCodePage <> Value then
|
|
begin
|
|
FCodePage := Value;
|
|
ClearRoot;
|
|
end;
|
|
end;
|
|
|
|
procedure TecRegExpr.ParseModifiers(const S: AnsiString; var Modifiers: Word);
|
|
var IsOn : boolean;
|
|
i: integer;
|
|
procedure SetModif(m: integer);
|
|
begin
|
|
if (m and FModifiersStatic) = 0 then
|
|
if IsOn then Modifiers := Modifiers or m
|
|
else Modifiers := Modifiers and not m;
|
|
end;
|
|
begin
|
|
IsOn := true;
|
|
for i := 1 to Length(S) do
|
|
case S[i] of
|
|
'-': IsOn := false;
|
|
'i','I': SetModif(MaskModI);
|
|
'r','R': SetModif(MaskModR);
|
|
's','S': SetModif(MaskModS);
|
|
'g','G': SetModif(MaskModG);
|
|
'm','M': SetModif(MaskModM);
|
|
'x','X': SetModif(MaskModX);
|
|
else
|
|
raise Exception.Create(zreUnexpectedModifier);
|
|
end;
|
|
end;
|
|
|
|
// =============================================================================
|
|
// DEBUGGER
|
|
// =============================================================================
|
|
{$IFDEF RE_DEBUG}
|
|
// Fill tree with compiled nodes {debug purpose}
|
|
procedure REDebugCompiledBuildTree(RE: TecRegExpr; TV: TTreeView);
|
|
function GetNodeCaption(Node: TRENodeBase): string;
|
|
begin
|
|
if Node.ClassType = TCharSeqNode then
|
|
Result := ecChar(TCharSeqNode(Node).FChar) else
|
|
if Node.ClassType = TCharSetNode then
|
|
begin
|
|
Result := '[ ... ]';
|
|
end else
|
|
if Node.ClassType = TSpecCheckNode then
|
|
Result := '! '+ TSpecCheckNode(Node).FCheckType + ' !' else
|
|
if Node.ClassType = TZeroWidth then
|
|
Result := '0'
|
|
else
|
|
Result := '<' + Node.ClassName + '>';
|
|
Result := IntToStr(Node.FNodeId) + ' ' + Result + Format(' {%d, %d}', [Node.FLoopMin, Node.FLoopMax]);
|
|
end;
|
|
|
|
procedure AddNode(Node: TRENodeBase; Prn: TTreeNode);
|
|
var tn: TTreeNode;
|
|
i: integer;
|
|
begin
|
|
tn := TV.Items.AddChild(Prn, GetNodeCaption(Node));
|
|
if Node is TreListNodeBase then
|
|
for i := 0 to TreListNodeBase(Node).FList.Count - 1 do
|
|
AddNode(TRENodeBase(TreListNodeBase(Node).FList[i]), tn);
|
|
if Node is TZeroWidth then
|
|
AddNode(TZeroWidth(Node).FBranch, tn);
|
|
end;
|
|
begin
|
|
TV.Items.Clear;
|
|
AddNode(TreRootNode(RE.FProgRoot), nil);
|
|
end;
|
|
{$ENDIF}
|
|
|
|
end.
|
|
|