{ *************************************************************************** } { } { EControl Syntax Editor SDK } { } { Copyright (c) 2004 - 2015 EControl Ltd., Zaharov Michael } { www.econtrol.ru } { support@econtrol.ru } { } { *************************************************************************** } {$mode delphi} {$define EC_UNICODE} unit ecZRegExpr; interface uses Classes, ecStrUtils, {$IFDEF RE_DEBUG}ComCtrls,{$ENDIF} Dialogs; const MaskModI = 1; // modifier /i bit in fModifiers MaskModR = 2; // -"- /r MaskModS = 4; // -"- /s MaskModG = 8; // -"- /g MaskModM = 16; // -"- /m MaskModX = 32; // -"- /x type TecRegExpr = class(TPersistent) private FProgRoot: TObject; FModifiers: Word; FMatchOK: Boolean; FCodePage: Cardinal; FExpression: ecString; FUnicodeCompiled: Boolean; FModifiersStatic: Word; procedure SetModifiers(const Value: Word); function GetModifier(const Index: Integer): boolean; function GetModifierStr: ecString; procedure SetModifier(const Index: Integer; const Value: boolean); procedure SetModifierStr(const Value: ecString); function GetIsInvalid: Boolean; function GetMatchLen(Idx: integer): integer; function GetMatchPos(Idx: integer): integer; function GetSubExprMatchCount: integer; procedure SetCodePage(const Value: Cardinal); procedure SetExpression(const Value: ecString); procedure ClearRoot; function IsEmpty: Boolean; procedure ParseModifiers(const S: AnsiString; var Modifiers: Word); public constructor Create; destructor Destroy; override; procedure Assign(Source: TPersistent); override; function Compile: Boolean; overload; function Compile(AsUnicode: Boolean): Boolean; overload; procedure Compile(const AExpression: AnsiString); overload; procedure Compile(const AExpression: UCString); overload; function Match(const InputString: UCString; var aPos: integer; Back: Boolean = False): Boolean; overload; function MatchLength(const InputString: AnsiString; aPos: integer; Back: Boolean = False): integer; overload; function MatchLength(const InputString: UCString; aPos: integer; Back: Boolean = False): integer; overload; function GetMatch(const InputString: AnsiString; SubIdx: integer): AnsiString; overload; function GetMatch(const InputString: UCString; SubIdx: integer): UCString; overload; function Substitute (const InputString, ATemplate : ecString) : ecString; property ModifierMask: Word read FModifiers write SetModifiers; property ModifierI: boolean index MaskModI read GetModifier write SetModifier; property ModifierR: boolean index MaskModR read GetModifier write SetModifier; property ModifierS: boolean index MaskModS read GetModifier write SetModifier; property ModifierG: boolean index MaskModG read GetModifier write SetModifier; property ModifierM: boolean index MaskModM read GetModifier write SetModifier; property ModifierX: boolean index MaskModX read GetModifier write SetModifier; property IsInvalid: Boolean read GetIsInvalid; property MatchPos[Idx: integer]: integer read GetMatchPos; property MatchLen[Idx: integer]: integer read GetMatchLen; property SubExprMatchCount: integer read GetSubExprMatchCount; published property Expression: ecString read FExpression write SetExpression; property ModifierStr: ecString read GetModifierStr write SetModifierStr stored False; property ModifierFlags: Word read FModifiers write FModifiers; property ModifiersStatic: Word read FModifiersStatic write FModifiersStatic; end; type TGetCustomCharClass = function(ClassCode: UCChar): Boolean; TCheckCustomCharClass = function(ClassCode: UCChar; CharCode: Word; var IsInClass: Boolean): Boolean; var DefaultModifiers: integer = MaskModI or MaskModR or MaskModG or MaskModM or MaskModX; GetCustomCharClassProc: TGetCustomCharClass = nil; CheckCustomCharClassProc: TCheckCustomCharClass = nil; const // Error messages zreUnexpectedEnd = 'Unexpected end of expression'; zreUnexpectedModifier = 'Unexpected modifier'; zreUnexpectedBracket = 'Unexpected bracket'; zreInvalidZeroWidth = 'Invalid zero-width expression'; {$IFDEF RE_DEBUG} type TREDebugOnMatchProc = procedure(const TraceStr: string) of object; var REDebugOnMatchProc: TREDebugOnMatchProc = nil; procedure REDebugCompiledBuildTree(RE: TecRegExpr; TV: TTreeView); {$ENDIF} implementation uses SysUtils, Contnrs, Math; {$IFDEF RE_DEBUG} var LastNodeID: integer; {$ENDIF} type TreSubExpr = class; TreRootNode = class; // Base node class TRENodeBase = class private {$IFDEF RE_DEBUG} FNodeId: integer; {$ENDIF} FLoopMin: integer; // repeat at least FLoopMax: integer; // not more than FNonGreedy: Boolean; // is greedy FNext: TRENodeBase; FOwner: TreSubExpr; function GetRoot: TreRootNode; protected public constructor Create(AOwner: TreSubExpr); virtual; function Match(const InputString: UCString; var aPos: integer): integer; overload; virtual; abstract; function BackMatch(const InputString: UCString; var aPos: integer): integer; overload; virtual; abstract; property Next: TRENodeBase read FNext write FNext; property Owner: TreSubExpr read FOwner; property Root: TreRootNode read GetRoot; end; // Char set & String TCharSetNode = class(TRENodeBase) private FIgnoreCase: Boolean; FInvert: Boolean; FCharRanges: UCString; FCharSets: UCString; FCharArray: UCString; procedure AddChar(C: AnsiChar); overload; procedure AddRange(st, en: AnsiChar); overload; function HasChar(C: AnsiChar): Boolean; overload; procedure AddChar(C: UCChar); overload; procedure AddRange(st, en: UCChar); overload; procedure AddSet(C: UCChar); overload; function HasChar(C: UCChar): Boolean; overload; public function Match(const InputString: UCString; var aPos: integer): integer; override; function BackMatch(const InputString: UCString; var aPos: integer): integer; override; end; // Simple text TCharSeqNode = class(TRENodeBase) private FIgnoreCase: Boolean; FChar: UCChar; public function Match(const InputString: UCString; var aPos: integer): integer; override; function BackMatch(const InputString: UCString; var aPos: integer): integer; override; end; // Special ckecking TSpecCheckNode = class(TRENodeBase) private FCheckType: UCChar; public function Match(const InputString: UCString; var aPos: integer): integer; override; function BackMatch(const InputString: UCString; var aPos: integer): integer; override; end; // Reference to sub expression TRefNode = class(TRENodeBase) private FRef: integer; FIgnoreCase: Boolean; function GetExprStr(const InputString: AnsiString): AnsiString; overload; function GetExprStr(const InputString: UCString): UCString; overload; public function Match(const InputString: UCString; var aPos: integer): integer; override; function BackMatch(const InputString: UCString; var aPos: integer): integer; override; end; // Base class of containers TreListNodeBase = class(TRENodeBase) private FList: TList; public constructor Create(AOwner: TreSubExpr); override; destructor Destroy; override; procedure Clear; virtual; end; // Branch of sub expression TreBranchNode = class(TreListNodeBase) private function GetClassChar(C: UCChar; Modifiers: Word): UCChar; public procedure Add(Node: TRENodeBase); procedure Invert; procedure Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); overload; procedure Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); overload; function Match(const InputString: UCString; var aPos: integer): integer; override; function BackMatch(const InputString: UCString; var aPos: integer): integer; override; end; // Zero-width testing // (?=RE) - positive, ahead // (?!RE) - negative, ahead // (?<=RE) - positive, behind // (? sub expression TreSubExpr = class(TreListNodeBase) private FStart: integer; // first char of sub expression FEnd: integer; // first char after sub expression public constructor Create(AOwner: TreSubExpr); override; procedure Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); overload; procedure Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); overload; function Match(const InputString: UCString; var aPos: integer): integer; override; function BackMatch(const InputString: UCString; var aPos: integer): integer; override; end; // Root node TreRootNode = class(TreSubExpr) private FSubExpr: TList; FOwner: TecRegExpr; public constructor Create(AOwner: TecRegExpr); reintroduce; destructor Destroy; override; procedure Clear; override; function MatchStr(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean; overload; property Owner: TecRegExpr read FOwner; end; // ============================================================================= // Utils functions // ============================================================================= function Min(a, b: integer): integer; begin if a < b then Result := a else Result := b; end; function GetAbsoluteNext(Node: TRENodeBase):TRENodeBase; begin Result := Node.Next; if (Result = nil) and (Node.Owner <> nil) then Result := GetAbsoluteNext(Node.Owner); end; function IsInRange(const RngType: UCChar; const C: AnsiChar): Boolean; overload; begin Result := False; if not Assigned(CheckCustomCharClassProc) or not CheckCustomCharClassProc(RngType, Ord(C), Result) then case RngType of 'w': Result := IsWordChar(C); 'W': Result := not IsWordChar(C); 'd': Result := IsDigitChar(C); 'D': Result := not IsDigitChar(C); 's': Result := IsSpaceChar(C); 'S': Result := not IsSpaceChar(C); 'h': Result := IsHexDigitChar(C); 'H': Result := not IsHexDigitChar(C); 'l': Result := IsAlphaChar(C); 'L': Result := not IsAlphaChar(C); 'c': Result := IsIdentChar(C); 'C': Result := not IsIdentChar(C); 'g': Result := IsIdentLetterChar(C); 'G': Result := not IsIdentLetterChar(C); 'k': Result := IsIdentDigitChar(C); 'K': Result := not IsIdentDigitChar(C); end; end; function IsInRange(const RngType: UCChar; const C: UCChar): Boolean; overload; begin Result := False; if not Assigned(CheckCustomCharClassProc) or not CheckCustomCharClassProc(RngType, Ord(C), Result) then case RngType of 'w': Result := IsWordChar(C); 'W': Result := not IsWordChar(C); 'd': Result := IsDigitChar(C); 'D': Result := not IsDigitChar(C); 's': Result := IsSpaceChar(C); 'S': Result := not IsSpaceChar(C); 'h': Result := IsHexDigitChar(C); 'H': Result := not IsHexDigitChar(C); 'l': Result := IsAlphaChar(C); 'L': Result := not IsAlphaChar(C); 'c': Result := IsIdentChar(C); 'C': Result := not IsIdentChar(C); 'g': Result := IsIdentLetterChar(C); 'G': Result := not IsIdentLetterChar(C); 'k': Result := IsIdentDigitChar(C); 'K': Result := not IsIdentDigitChar(C); end; end; function GetEscape(const Expression: AnsiString; var aPos: integer): UCChar; overload; var strt: integer; begin Result := #0; case Expression[aPos] of 't': Result := #$9; 'n': Result := #$A; 'r': Result := #$D; 'f': Result := #$C; 'a': Result := #$7; 'e': Result := #$1B; 'x': begin inc(aPos); if aPos > Length(Expression) then raise Exception.Create('Invalid escape char'); if Expression[aPos] = '{' then begin inc(aPos); strt := aPos; while (aPos < Length(Expression)) and (Expression[aPos] <> '}') do inc(aPos); Result := UCChar(StrToInt('$' + Trim(string(Copy(Expression, strt, aPos - strt))))) end else begin Result := UCChar(StrToInt('$' + string(Copy(Expression, aPos, 2)))); Inc(aPos); end; if Result = '' then raise Exception.Create('Invalid HEX char'); end; end; end; function GetEscape(const Expression: UCString; var aPos: integer): UCChar; overload; var strt: integer; begin Result := #0; case Expression[aPos] of 't': Result := #$9; 'n': Result := #$A; 'r': Result := #$D; 'f': Result := #$C; 'a': Result := #$7; 'e': Result := #$1B; 'x': begin inc(aPos); if aPos > Length(Expression) then raise Exception.Create('Invalid escape char'); if Expression[aPos] = '{' then begin inc(aPos); strt := aPos; while (aPos < Length(Expression)) and (Expression[aPos] <> '}') do inc(aPos); Result := UCChar(StrToInt('$' + Trim(Copy(Expression, strt, aPos - strt)))); end else begin Result := UCChar(StrToInt('$' + Copy(Expression, aPos, 2))); Inc(aPos); end; if Result = '' then raise Exception.Create('Invalid HEX char'); end; end; end; { TRENodeBase } constructor TRENodeBase.Create(AOwner: TreSubExpr); begin inherited Create; FOwner := AOwner; FLoopMin := 1; FLoopMax := 1; {$IFDEF RE_DEBUG} Inc(LastNodeID); FNodeId := LastNodeID; {$ENDIF} end; function TRENodeBase.GetRoot: TreRootNode; var Node: TRENodeBase; begin Node := Self; while Node.Owner <> nil do Node := Node.Owner; Result := Node as TreRootNode; end; { TCharSetNode } function TCharSetNode.HasChar(C: AnsiChar): Boolean; var i, N, k: integer; begin N := Length(FCharRanges); if N > 0 then for i := 1 to N shr 1 do begin k := i shl 1; if (Ord(FCharRanges[k]) >= Ord(C)) and (Ord(FCharRanges[k - 1]) <= Ord(C)) then begin Result := True; Exit; end; end; for i := 1 to Length(FCharSets) do if IsInRange(FCharSets[i], C) then begin Result := True; Exit; end; Result := Pos(UCChar(C), FCharArray) <> 0; end; procedure TCharSetNode.AddChar(C: AnsiChar); begin if FIgnoreCase then C := ecUpCase(C); if not HasChar(C) then FCharArray := FCharArray + C; end; procedure TCharSetNode.AddRange(st, en: AnsiChar); begin if FIgnoreCase then begin st := ecUpCase(st); en := ecUpCase(en); end; FCharRanges := FCharRanges + UCChar(st) + UCChar(en); end; function TCharSetNode.HasChar(C: UCChar): Boolean; var i, N, k: integer; begin N := Length(FCharRanges); if N > 0 then for i := 1 to N shr 1 do begin k := i shl 1; if (Ord(FCharRanges[k]) >= Ord(C)) and (Ord(FCharRanges[k - 1]) <= Ord(C)) then begin Result := True; Exit; end; end; for i := 1 to Length(FCharSets) do if IsInRange(FCharSets[i], C) then begin Result := True; Exit; end; Result := Pos(C, FCharArray) <> 0; end; procedure TCharSetNode.AddChar(C: UCChar); begin if FIgnoreCase then C := ecUpCase(C); if Pos(C, FCharArray) = 0 then FCharArray := FCharArray + C; end; procedure TCharSetNode.AddRange(st, en: UCChar); begin if FIgnoreCase then begin st := ecUpCase(st); en := ecUpCase(en); end; FCharRanges := FCharRanges + st + en; end; procedure TCharSetNode.AddSet(C: UCChar); begin FCharSets := FCharSets + C; end; function TCharSetNode.Match(const InputString: UCString; var aPos: integer): integer; var C: UCChar; b: Boolean; begin if aPos > Length(InputString) then begin Result := 0; Exit; end; C := InputString[aPos]; if FIgnoreCase then C := ecUpCase(C); b := HasChar(C); if FInvert then b := not b; if b then Inc(aPos); if b then Result := 1 else Result := 0; end; function TCharSetNode.BackMatch(const InputString: UCString; var aPos: integer): integer; var C: UCChar; b: Boolean; begin if aPos <= 1 then begin Result := 0; Exit; end; C := InputString[aPos - 1]; if FIgnoreCase then C := ecUpCase(C); b := HasChar(C); if FInvert then b := not b; if b then Dec(aPos); if b then Result := 1 else Result := 0; end; { TCharSeqNode } function TCharSeqNode.BackMatch(const InputString: UCString; var aPos: integer): integer; var C: UCChar; begin Result := 0; if aPos > 1 then begin C := InputString[aPos - 1]; if FIgnoreCase then C := ecUpCase(C); if C = FChar then begin Dec(aPos); Result := 1; end; end; end; function TCharSeqNode.Match(const InputString: UCString; var aPos: integer): integer; var C: UCChar; begin Result := 0; if aPos <= Length(InputString) then begin C := InputString[aPos]; if FIgnoreCase then C := ecUpCase(C); if C = FChar then begin Inc(aPos, 1); Result := 1; end; end; end; { TSpecCheckNode } function TSpecCheckNode.BackMatch(const InputString: UCString; var aPos: integer): integer; var C: UCChar; b: Boolean; begin b := False; if aPos <= 1 then begin if aPos < 1 then begin Result := 0; Exit; end; C := #0; end else C := InputString[aPos - 1]; case FCheckType of '^': b := (C = #0) or (C = #10) or (C = #13) and (aPos <= Length(InputString)) and (InputString[aPos] <> #10); '$': b := (aPos > Length(InputString)) or (InputString[aPos] = #13) or (InputString[aPos] = #10) and (C <> #13); 'A': b := C = #0; 'Z': b := aPos > Length(InputString); 'b': b := IsWordBreak(aPos, InputString); 'B': b := not IsWordBreak(aPos, InputString); 'z': begin b := IsLineBreakChar(C); if b then begin Dec(aPos); if (C = #10) and (aPos > 0) and (InputString[aPos] = #13) then Dec(aPos); end; end; else if C<> #0 then begin case FCheckType of '.': b := True; ':': b := not IsLineBreakChar(C); else b := IsInRange(FCheckType, C); end; if b then Dec(aPos); end; end; if b then Result := 1 else Result := 0; end; function TSpecCheckNode.Match(const InputString: UCString; var aPos: integer): integer; var C: UCChar; b: Boolean; begin b := False; if aPos > Length(InputString) then begin if aPos - 1 > Length(InputString) then begin Result := 0; Exit; end; C := #0; end else C := InputString[aPos]; case FCheckType of '^': b := (aPos = 1) or (InputString[aPos - 1] = #10) or (InputString[aPos - 1] = #13) and (C <> #10); '$': b := (C = #13) or (C = #10) and (aPos > 1) and (InputString[aPos - 1] <> #13) or (C = #0); 'A': b := aPos = 1; 'Z': b := C = #0; 'b': b := IsWordBreak(aPos, InputString); 'B': b := not IsWordBreak(aPos, InputString); 'z': begin b := IsLineBreakChar(C); if b then begin Inc(aPos); if (C = #13) and (aPos <= Length(InputString)) and (InputString[aPos] = #10) then Inc(aPos); end; end; else if C<> #0 then begin case FCheckType of '.': b := True; ':': b := not IsLineBreakChar(C); else b := IsInRange(FCheckType, C); end; if b then Inc(aPos); end; end; if b then Result := 1 else Result := 0; end; { TRefNode } function TRefNode.GetExprStr(const InputString: AnsiString): AnsiString; var se: TreSubExpr; l: integer; begin Result := ''; with Root do if FSubExpr.Count > FRef then begin se := TreSubExpr(FSubExpr[FRef]); l := abs(se.FEnd - se.FStart); if (se.FStart > 0) and (se.FEnd > 0) then Result := Copy(InputString, Min(se.FStart, se.FEnd), l); end; end; function TRefNode.GetExprStr(const InputString: UCString): UCString; var se: TreSubExpr; l: integer; begin Result := ''; with Root do if FSubExpr.Count > FRef then begin se := TreSubExpr(FSubExpr[FRef]); l := abs(se.FEnd - se.FStart); if (se.FStart > 0) and (se.FEnd > 0) then Result := Copy(InputString, Min(se.FStart, se.FEnd), l); end; end; function TRefNode.BackMatch(const InputString: UCString; var aPos: integer): integer; var S, S1: UCString; L: integer; b: Boolean; begin Result := 0; S := GetExprStr(InputString); L := Length(S); if (L > 0) and (L < aPos) then begin S1 := Copy(InputString, aPos - L, L); if FIgnoreCase then b := SameText(S, S1) else b := S = S1; if b then begin Dec(aPos, L); Result := 1; end; end; end; function TRefNode.Match(const InputString: UCString; var aPos: integer): integer; var S, S1: UCString; l: integer; b: Boolean; begin Result := 0; S := GetExprStr(InputString); L := Length(S); if (L > 0) and (L <= Length(InputString) - aPos + 1) then begin S1 := Copy(InputString, aPos, l); if FIgnoreCase then b := SameText(S, S1) else b := S = S1; if b then begin Inc(aPos, L); Result := 1; end; end; end; { TZeroWidth } destructor TZeroWidth.Destroy; begin FreeAndNil(FBranch); inherited; end; function TZeroWidth.DoResult(MatchRes: integer): integer; begin if FNegative then begin If MatchRes = 0 then Result := 2 else Result := 0; end else Result := MatchRes; end; function TZeroWidth.BackMatch(const InputString: UCString; var aPos: integer): integer; var testPos: integer; begin testPos := aPos; if FIsBack then Result := DoResult(FBranch.Match(InputString, testPos)) else Result := DoResult(FBranch.BackMatch(InputString, testPos)); end; function TZeroWidth.Match(const InputString: UCString; var aPos: integer): integer; var testPos: integer; begin testPos := aPos; if FIsBack then Result := DoResult(FBranch.BackMatch(InputString, testPos)) else Result := DoResult(FBranch.Match(InputString, testPos)); end; { TreListNodeBase } procedure TreListNodeBase.Clear; begin FList.Clear; end; constructor TreListNodeBase.Create(AOwner: TreSubExpr); begin inherited; FList := TObjectList.Create; end; destructor TreListNodeBase.Destroy; begin FList.Free; inherited; end; { TreBranchNode } function TreBranchNode.GetClassChar(C: UCChar; Modifiers: Word): UCChar; begin if Assigned(GetCustomCharClassProc) and GetCustomCharClassProc(C) then Result := C else // User defined character class if (MaskModR and Modifiers) = 0 then case C of 'w': Result := 'c'; 'W': Result := 'C'; 'd': Result := 'k'; 'D': Result := 'K'; 'l': Result := 'g'; 'L': Result := 'G'; 's', 'S', 'h', 'H', 'c', 'C', 'g', 'G', 'k', 'K': Result := C; else Result := #0; end else case C of 'w', 'W', 'd', 'D', 'l', 'L', 's', 'S', 'h', 'H', 'c', 'C', 'g', 'G', 'k', 'K': Result := C; else Result := #0; end; end; procedure TreBranchNode.Add(Node: TRENodeBase); begin if FList.Count > 0 then TRENodeBase(FList.Last).Next := Node; FList.Add(Node); end; procedure TreBranchNode.Invert; var i, N: integer; NextLast: TRENodeBase; begin N := FList.Count; if N = 0 then Exit; NextLast := TRENodeBase(FList.Last).Next; for i := 0 to (N div 2) - 1 do FList.Exchange(i, N - 1 - i); for i := 0 to N - 2 do TRENodeBase(FList[i]).Next := TRENodeBase(FList[i + 1]); TRENodeBase(FList.Last).Next := NextLast; end; // All characters are ANSI characters. // UCChar is used only for holding char codes less 255 // This is made to have unified storage of regular expr. nodes procedure TreBranchNode.Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); var Len: integer; sub: TreSubExpr; C: UCChar; function SafeInc(RaiseEx: Boolean = False): AnsiChar; // Increment position to the next significant char begin inc(aPos); // Skip spaces and comments if (Modifiers and MaskModX) <> 0 then while (aPos <= Len) and (IsSpaceChar(Expression[aPos]) or (Expression[aPos] = '#')) do begin if Expression[aPos] = '#' then begin Inc(aPos); while (aPos <= Len) and not IsLineBreakChar(Expression[aPos]) do Inc(aPos); end; Inc(aPos); end; if aPos <= Len then Result := Expression[aPos] else begin Result := #0; if RaiseEx then raise Exception.Create(zreUnexpectedEnd); end; end; function ReadNumber: integer; var strt: integer; begin strt := aPos; while (aPos <= Len) and (Expression[aPos] >= '0') and (Expression[aPos] <= '9') do Inc(aPos); if strt = aPos then raise Exception.Create('Number is expected'); Result := StrToInt(Copy(Expression, strt, aPos - strt)); Dec(aPos); end; // Read repeaters for node procedure ReadRepeaters(Node: TRENodeBase); begin if aPos > Len then Exit;// repeaters are optional case SafeInc of '+': Node.FLoopMax := -1; '?': Node.FLoopMin := 0; '*': begin Node.FLoopMax := -1; Node.FLoopMin := 0; end; '{': begin SafeInc; Node.FLoopMin := ReadNumber; case SafeInc of ',': if SafeInc <> '}' then begin Node.FLoopMax := ReadNumber; if SafeInc <> '}' then raise Exception.Create('There is no closing "}"'); end else Node.FLoopMax := -1; '}': Node.FLoopMax := Node.FLoopMin; else raise Exception.Create('Invalid loop specifier'); end; if (Node.FLoopMax >= 0) and (Node.FLoopMax < Node.FLoopMin) then raise Exception.Create('Loop minimum must be less then loop maximum'); end; else begin Dec(aPos); Exit; end end; if SafeInc = '?' then Node.FNonGreedy := True else begin Node.FNonGreedy := (Modifiers and MaskModG) = 0; Dec(aPos); end; end; procedure AddCharSeq(const C: UCChar); var csNode: TCharSeqNode; begin csNode := TCharSeqNode.Create(Owner); csNode.FIgnoreCase := (Modifiers and MaskModI) <> 0; if csNode.FIgnoreCase then csNode.FChar := UCChar(ecUpCase(AnsiChar(C))) else csNode.FChar := C; Add(csNode); ReadRepeaters(csNode); end; procedure AddSpecNode(const C: UCChar; WithRepeat: Boolean = True); var sn: TSpecCheckNode; begin sn := TSpecCheckNode.Create(Owner); sn.FCheckType := C; Add(sn); if WithRepeat then ReadRepeaters(sn); end; function PickSetChar(cs: TCharSetNode): UCChar; begin Result := UCChar(Expression[aPos]); if Result = '\' then begin Inc(aPos); if aPos > Length(Expression) then Exit; Result := GetEscape(Expression, aPos); if Result = #0 then begin Result := GetClassChar(UCChar(Expression[aPos]), Modifiers); if Result = #0 then Result := UCChar(Expression[aPos]) else begin cs.AddSet(Result); Result := #0; end; end; end; end; procedure ReadCharSet; var cs: TCharSetNode; Cstrt, Cend: UCChar; begin cs := TCharSetNode.Create(Owner); cs.FIgnoreCase := (Modifiers and MaskModI) <> 0; Add(cs); Cstrt := #0; if SafeInc(True) = '^' then cs.FInvert := True else Cstrt := PickSetChar(cs); while SafeInc(True) <> ']' do begin if (Expression[aPos] = '-') and (Cstrt <> #0) then // Add Range if SafeInc = ']' then begin cs.AddChar(Cstrt); Cstrt := '-'; Break; end else begin Cend := PickSetChar(cs); if Cend = #0 then begin cs.AddChar(Cstrt); Cstrt := '-'; end else if Ord(Cend) < Ord(Cstrt) then raise Exception.Create('Invalid set range') else begin // Extended russian support cs.AddRange(Cstrt, Cend); Cstrt := #0; if SafeInc(True) = ']' then Break; end; end; if Cstrt <> #0 then cs.AddChar(Cstrt); Cstrt := PickSetChar(cs); end; if Cstrt <> #0 then cs.AddChar(Cstrt); ReadRepeaters(cs); end; procedure AddRefNode(RefIdx: integer); var rn: TRefNode; begin if TreRootNode(Root).FSubExpr.Count <= RefIdx then raise Exception.Create('Invalid reference'); rn := TRefNode.Create(Owner); rn.FRef := RefIdx; rn.FIgnoreCase := (Modifiers and MaskModI) <> 0; Add(rn); ReadRepeaters(rn); end; procedure AddZeroWidth(IsBack: Boolean); var Negative: Boolean; Branch: TreBranchNode; Node: TZeroWidth; begin case Expression[aPos] of '!': Negative := True; '=': Negative := False; else raise Exception.Create(zreInvalidZeroWidth); end; SafeInc(True); Branch := TreBranchNode.Create(nil); try Branch.Compile(Expression, aPos, Modifiers); Node := TZeroWidth.Create(Owner); Node.FIsBack := IsBack; Node.FNegative := Negative; Node.FBranch := Branch; Add(Node); if IsBack then Branch.Invert; except Branch.Free; end; end; var tp: integer; begin Clear; Len := Length(Expression); Dec(aPos); while aPos <= Len do begin case SafeInc of ')', '|', #0: Exit; // end of branch '(': begin if SafeInc = '?' then begin // Change modifiers case SafeInc(True) of '<': begin SafeInc(True); AddZeroWidth(True); end; '=', '!': AddZeroWidth(False); else begin tp := aPos; repeat // skip comment Inc(aPos); until (aPos > Len) or (Expression[aPos] = ')'); if Expression[tp] <> '#' then Root.Owner.ParseModifiers(Copy(Expression, tp, aPos - tp), Modifiers) end; end; end else begin // sub expression sub := TreSubExpr.Create(Owner); Add(sub); sub.Compile(Expression, aPos, Modifiers); if (aPos > Len) or (Expression[aPos] <> ')') then raise Exception.Create('Do not closed sub expression'); ReadRepeaters(sub); end; end; '[': begin // char set node ReadCharSet; end; '^': if (Modifiers and MaskModM) = 0 then AddSpecNode('A', False) // begin of text else AddSpecNode('^', False); // begin of line '$': if (Modifiers and MaskModM) = 0 then AddSpecNode('Z', False) // end of text else AddSpecNode('$', False); // end of line '.': if (Modifiers and MaskModS) <> 0 then AddSpecNode('.') // all else AddSpecNode(':'); // all without line separators '\': begin Inc(aPos); if aPos > Len then C := '\' else C := GetEscape(Expression, aPos); if C <> #0 then AddCharSeq(C) else begin C := GetClassChar(UCChar(Expression[aPos]), Modifiers); if C <> #0 then AddSpecNode(C) else case Expression[aPos] of 'A', 'Z', 'b', 'B', 'z': AddSpecNode(UCChar(Expression[aPos])); '1'..'9': AddRefNode(Ord(Expression[aPos])-Ord('0')); else AddCharSeq(UCChar(Expression[aPos])); end; end; end; else AddCharSeq(UCChar(Expression[aPos])); // Simple char end; end; end; procedure TreBranchNode.Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); var Len: integer; sub: TreSubExpr; C: UCChar; function SafeInc(RaiseEx: Boolean = False): UCChar; // Increment position to the next significant char begin inc(aPos); // Skip spaces and comments if (Modifiers and MaskModX) <> 0 then while (aPos <= Len) and (IsSpaceChar(Expression[aPos]) or (Expression[aPos] = '#')) do begin if Expression[aPos] = '#' then begin Inc(aPos); while (aPos <= Len) and not IsLineBreakChar(Expression[aPos]) do Inc(aPos); end; Inc(aPos); end; if aPos <= Len then Result := Expression[aPos] else begin Result := #0; if RaiseEx then raise Exception.Create(zreUnexpectedEnd); end; end; function ReadNumber: integer; var strt: integer; begin strt := aPos; while (aPos <= Len) and (Expression[aPos] >= '0') and (Expression[aPos] <= '9') do Inc(aPos); if strt = aPos then raise Exception.Create('Number is expected'); Result := StrToInt(Copy(Expression, strt, aPos - strt)); Dec(aPos); end; // Read repeaters for node procedure ReadRepeaters(Node: TRENodeBase); begin if aPos > Len then Exit;// repeaters are optional case SafeInc of '+': Node.FLoopMax := -1; '?': Node.FLoopMin := 0; '*': begin Node.FLoopMax := -1; Node.FLoopMin := 0; end; '{': begin SafeInc; Node.FLoopMin := ReadNumber; case SafeInc of ',': if SafeInc <> '}' then begin Node.FLoopMax := ReadNumber; if SafeInc <> '}' then raise Exception.Create('There is no closing "}"'); end else Node.FLoopMax := -1; '}': Node.FLoopMax := Node.FLoopMin; else raise Exception.Create('Invalid loop specifier'); end; if (Node.FLoopMax >= 0) and (Node.FLoopMax < Node.FLoopMin) then raise Exception.Create('Loop minimum must be less then loop maximum'); end; else begin Dec(aPos); Exit; end end; if SafeInc = '?' then Node.FNonGreedy := True else begin Node.FNonGreedy := (Modifiers and MaskModG) = 0; Dec(aPos); end; end; procedure AddCharSeq(const C: UCChar); var csNode: TCharSeqNode; begin csNode := TCharSeqNode.Create(Owner); csNode.FIgnoreCase := (Modifiers and MaskModI) <> 0; if csNode.FIgnoreCase then csNode.FChar := ecUpCase(C) else csNode.FChar := C; Add(csNode); ReadRepeaters(csNode); end; procedure AddSpecNode(const C: UCChar; WithRepeat: Boolean = True); var sn: TSpecCheckNode; begin sn := TSpecCheckNode.Create(Owner); sn.FCheckType := C; Add(sn); if WithRepeat then ReadRepeaters(sn); end; function PickSetChar(cs: TCharSetNode): UCChar; begin Result := Expression[aPos]; if Result = '\' then begin Inc(aPos); if aPos > Length(Expression) then Exit; Result := GetEscape(Expression, aPos); if Result = #0 then begin Result := GetClassChar(Expression[aPos], Modifiers); if Result = #0 then Result := Expression[aPos] else begin cs.AddSet(Result); Result := #0; end; end; end; end; procedure ReadCharSet; var cs: TCharSetNode; Cstrt, Cend: UCChar; begin cs := TCharSetNode.Create(Owner); cs.FIgnoreCase := (Modifiers and MaskModI) <> 0; Add(cs); Cstrt := #0; if SafeInc(True) = '^' then cs.FInvert := True else Cstrt := PickSetChar(cs); while SafeInc(True) <> ']' do begin if (Expression[aPos] = '-') and (Cstrt <> #0) then // Add Range if SafeInc = ']' then begin cs.AddChar(Cstrt); Cstrt := '-'; Break; end else begin Cend := PickSetChar(cs); if Cend = #0 then begin cs.AddChar(Cstrt); Cstrt := '-'; end else if Ord(Cend) < Ord(Cstrt) then raise Exception.Create('Invalid set range') else begin // Extended russian support cs.AddRange(Cstrt, Cend); Cstrt := #0; if SafeInc(True) = ']' then Break; end; end; if Cstrt <> #0 then cs.AddChar(Cstrt); Cstrt := PickSetChar(cs); end; if Cstrt <> #0 then cs.AddChar(Cstrt); ReadRepeaters(cs); end; procedure AddRefNode(RefIdx: integer); var rn: TRefNode; begin if TreRootNode(Root).FSubExpr.Count <= RefIdx then raise Exception.Create('Invalid reference'); rn := TRefNode.Create(Owner); rn.FRef := RefIdx; rn.FIgnoreCase := (Modifiers and MaskModI) <> 0; Add(rn); ReadRepeaters(rn); end; procedure AddZeroWidth(IsBack: Boolean); var Negative: Boolean; Branch: TreBranchNode; Node: TZeroWidth; begin case Expression[aPos] of '!': Negative := True; '=': Negative := False; else raise Exception.Create(zreInvalidZeroWidth); end; SafeInc(True); Branch := TreBranchNode.Create(nil); try Branch.Compile(Expression, aPos, Modifiers); Node := TZeroWidth.Create(Owner); Node.FIsBack := IsBack; Node.FNegative := Negative; Node.FBranch := Branch; Add(Node); if IsBack then Branch.Invert; except Branch.Free; end; end; var tp: integer; begin Clear; Len := Length(Expression); Dec(aPos); while aPos <= Len do begin case SafeInc of ')', '|', #0: Exit; // end of branch '(': begin if SafeInc = '?' then begin // Change modifiers case SafeInc(True) of '<': begin SafeInc(True); AddZeroWidth(True); end; '=', '!': AddZeroWidth(False); else begin tp := aPos; repeat // skip comment Inc(aPos); until (aPos > Len) or (Expression[aPos] = ')'); if Expression[tp] <> '#' then Root.Owner.ParseModifiers(Copy(Expression, tp, aPos - tp), Modifiers) end; end; end else begin // sub expression sub := TreSubExpr.Create(Owner); Add(sub); sub.Compile(Expression, aPos, Modifiers); if (aPos > Len) or (Expression[aPos] <> ')') then raise Exception.Create('Do not closed sub expression'); ReadRepeaters(sub); end; end; '[': begin // char set node ReadCharSet; end; '^': if (Modifiers and MaskModM) = 0 then AddSpecNode('A', False) // begin of text else AddSpecNode('^', False); // begin of line '$': if (Modifiers and MaskModM) = 0 then AddSpecNode('Z', False) // end of text else AddSpecNode('$', False); // end of line '.': if (Modifiers and MaskModS) <> 0 then AddSpecNode('.') // all else AddSpecNode(':'); // all without line separators '\': begin Inc(aPos); if aPos > Len then C := '\' else C := GetEscape(Expression, aPos); if C <> #0 then AddCharSeq(C) else begin C := GetClassChar(Expression[aPos], Modifiers); if C <> #0 then AddSpecNode(C) else case Expression[aPos] of 'A', 'Z', 'b', 'B', 'z': AddSpecNode(Expression[aPos]); '1'..'9': AddRefNode(Ord(Expression[aPos])-Ord('0')); else AddCharSeq(Expression[aPos]); end; end; end; else AddCharSeq(Expression[aPos]); // Simple char end; end; end; // Main mtaching routine (recursive) // Returns: // 0 - does not match // 1 - match the Node // 2 - match The Node and all next nodes function MatchNode(Node: TRENodeBase; const InputString: UCString; var aPos: integer): integer; overload; var save, k, sv, LastSucc, total, Success: integer; IsBrEnd: Boolean; begin if Node = nil then begin Result := 2; Exit; end; Result := 0; // required minimum repeat save := aPos; Success := 0; for k := 1 to node.FLoopMin do begin Success := Node.Match(InputString, aPos); if Success = 0 then begin aPos := save; {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, 0])); {$ENDIF} Exit; end; end; k := node.FLoopMin - 1; // k := 0; LastSucc := 0; total := 0; IsBrEnd := Assigned(Node.Owner) and ((Node.Next = nil) or (Node.Next.Owner <> Node.Owner)); repeat Inc(k); sv := aPos; if IsBrEnd then Node.Owner.FEnd := aPos; if Node is TreSubExpr then begin if Success <> 2 then Success := MatchNode(GetAbsoluteNext(Node), InputString, aPos); end else if Node.Next <> nil then Success := MatchNode(Node.Next, InputString, aPos) else if IsBrEnd then Success := MatchNode(GetAbsoluteNext(Node.Owner), InputString, aPos) else Success := 2; if Success = 2 then // success all next nodes begin {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Success at %d; Node: %d; NextRes: %s',[sv, Node.FNodeID, IntToStr(Success)])); {$ENDIF} total := aPos; LastSucc := sv; aPos := LastSucc; Result := Success; if node.FNonGreedy then Break; // for non GREEDY mode end else if IsBrEnd then begin total := sv; Result := 1; end else if Success = 1 then begin aPos := sv; total := sv; Result := 1; if node.FNonGreedy then Break; // for non GREEDY mode end; if (node.FLoopMax > 0) and (k >= node.FLoopMax) then // check max limit begin if not IsBrEnd then //v2.36 Result := Success; //v2.33 Break; end; sv := aPos; Success := Node.Match(InputString, aPos); until (Success = 0) or (aPos = sv); // if (node.FLoopMin > 0) and (k < node.FLoopMin) then // Result := 0; {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, integer(Result)])); {$ENDIF} if Result > 0 then begin aPos := total; // Save sub-expression result if IsBrEnd and (LastSucc > 0) then Node.Owner.FEnd := LastSucc; end else begin aPos := save; if IsBrEnd then Node.Owner.FEnd := 0; end; end; // Main mtaching routine (recursive) function BackMatchNode(Node: TRENodeBase; const InputString: UCString; var aPos: integer): integer; overload; var save, k, sv, LastSucc, total, Success: integer; IsBrEnd: Boolean; begin if Node = nil then begin Result := 2; Exit; end; Result := 0; // required minimum repeat save := aPos; Success := 0; for k := 1 to node.FLoopMin do begin Success := Node.BackMatch(InputString, aPos); if Success = 0 then begin aPos := save; {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, 0])); {$ENDIF} Exit; end; end; k := node.FLoopMin - 1; LastSucc := 0; total := 0; IsBrEnd := Assigned(Node.Owner) and ((Node.Next = nil) or (Node.Next.Owner <> Node.Owner)); repeat Inc(k); sv := aPos; if IsBrEnd then Node.Owner.FStart := aPos; if Node is TreSubExpr then begin if Success <> 2 then Success := BackMatchNode(GetAbsoluteNext(Node), InputString, aPos); end else if Node.Next <> nil then Success := BackMatchNode(Node.Next, InputString, aPos) else if IsBrEnd then Success := BackMatchNode(GetAbsoluteNext(Node.Owner), InputString, aPos) else Success := 2; if Success = 2 then begin {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Success at %d; Node: %d; NextRes: %s',[sv, Node.FNodeID, IntToStr(Success)])); {$ENDIF} total := aPos; LastSucc := sv; aPos := LastSucc; Result := Success; if node.FNonGreedy then Break; // for non GREEDY mode end else if IsBrEnd then begin total := sv; Result := 1; end else if Success = 1 then begin aPos := sv; total := sv; Result := 1; end; if (node.FLoopMax > 0) and (k >= node.FLoopMax) then // check max limit begin if not IsBrEnd then //v2.36 Result := Success; //v2.33 Break; end; sv := aPos; Success := Node.BackMatch(InputString, aPos); until (Success = 0) or (aPos = sv); {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Node: %d; Position: %d; Result: %d',[Node.FNodeID, aPos, integer(Result)])); {$ENDIF} if Result > 0 then begin aPos := total; // Save sub-expression result if IsBrEnd and (LastSucc > 0) then Node.Owner.FStart := LastSucc; end else begin aPos := save; if IsBrEnd then Node.Owner.FStart := 0; end; end; function TreBranchNode.Match(const InputString: UCString; var aPos: integer): integer; begin {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Branch<: %d; Position: %d',[FNodeID, aPos])); {$ENDIF} if FList.Count = 0 then Result := 1 else Result := MatchNode(TRENodeBase(FList.First), InputString, aPos); {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Branch>: %d; Position: %d; Result: %d',[FNodeID, aPos, integer(Result)])); {$ENDIF} end; function TreBranchNode.BackMatch(const InputString: UCString; var aPos: integer): integer; begin {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Branch<: %d; Position: %d',[FNodeID, aPos])); {$ENDIF} if FList.Count = 0 then Result := 1 else Result := BackMatchNode(TRENodeBase(FList.First), InputString, aPos); {$IFDEF RE_DEBUG} if Assigned(REDebugOnMatchProc) then REDebugOnMatchProc(Format('Branch>: %d; Position: %d; Result: %d',[FNodeID, aPos, integer(Result)])); {$ENDIF} end; { TreSubExpr } procedure TreSubExpr.Compile(const Expression: AnsiString; var aPos: integer; Modifiers: Word); var Br: TreBranchNode; begin Dec(aPos); repeat Inc(aPos); Br := TreBranchNode.Create(Self); FList.Add(Br); Br.Compile(Expression, aPos, Modifiers); if Br.FList.Count = 0 then FList.Remove(Br); until (aPos > Length(Expression)) or (Expression[aPos] = ')'); end; procedure TreSubExpr.Compile(const Expression: UCString; var aPos: integer; Modifiers: Word); var Br: TreBranchNode; begin Dec(aPos); repeat Inc(aPos); Br := TreBranchNode.Create(Self); FList.Add(Br); Br.Compile(Expression, aPos, Modifiers); if Br.FList.Count = 0 then FList.Remove(Br); until (aPos > Length(Expression)) or (Expression[aPos] = ')'); end; constructor TreSubExpr.Create(AOwner: TreSubExpr); begin inherited; if Owner <> nil then TreRootNode(Root).FSubExpr.Add(Self); end; function TreSubExpr.Match(const InputString: UCString; var aPos: integer): integer; var i: integer; OldEnd, OldStart, CurRes, svPos: integer; begin OldEnd := FEnd; OldStart := FStart; FStart := aPos; FEnd := 0; Result := 0; svPos := aPos; for i := 0 to FList.Count - 1 do begin CurRes := TreBranchNode(FList[i]).Match(InputString, aPos); if FNonGreedy then begin if CurRes > 0 then begin OldEnd := FEnd; OldStart := FStart; Result := CurRes; aPos := svPos; Break; end; end else if CurRes > Result then begin OldEnd := FEnd; OldStart := FStart; Result := CurRes; aPos := svPos; if Result = 2 then Break; end; end; FEnd := OldEnd; FStart := OldStart; if (FEnd > 0) and (Result > 0) then aPos := FEnd; end; function TreSubExpr.BackMatch(const InputString: UCString; var aPos: integer): integer; var i: integer; OldEnd, OldStart, CurRes, svPos: integer; begin OldEnd := FEnd; OldStart := FStart; FStart := 0; FEnd := aPos; Result := 0; svPos := aPos; for i := 0 to FList.Count - 1 do begin CurRes := TreBranchNode(FList[i]).BackMatch(InputString, aPos); if FNonGreedy then begin if CurRes > 0 then begin OldEnd := FEnd; OldStart := FStart; Result := CurRes; aPos := svPos; Break; end; end else if CurRes > Result then begin OldEnd := FEnd; OldStart := FStart; Result := CurRes; aPos := svPos; if Result = 2 then Break; end; end; FEnd := OldEnd; FStart := OldStart; if (FStart > 0) and (Result > 0) then aPos := FStart; end; { TreRootNode } procedure TreRootNode.Clear; begin inherited; FSubExpr.Clear; FSubExpr.Add(Self); end; constructor TreRootNode.Create(AOwner: TecRegExpr); begin inherited Create(nil); FOwner := AOwner; FSubExpr := TList.Create; FSubExpr.Add(Self); end; destructor TreRootNode.Destroy; begin FSubExpr.Free; inherited; end; function TreRootNode.MatchStr(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean; var i: integer; begin for i := 0 to FSubExpr.Count - 1 do begin TreSubExpr(FSubExpr[i]).FStart := -1; TreSubExpr(FSubExpr[i]).FEnd := -1; end; if Back then Result := BackMatchNode(Self, InputString, aPos) <> 0 else Result := MatchNode(Self, InputString, aPos) <> 0; end; // ============================================================================= // Application Level // ============================================================================= { TecRegExpr } constructor TecRegExpr.Create; begin inherited; FModifiers := DefaultModifiers; end; destructor TecRegExpr.Destroy; begin FreeAndNil(FProgRoot); inherited; end; procedure TecRegExpr.ClearRoot; begin FreeAndNil(FProgRoot); FMatchOK := False; end; function TecRegExpr.IsEmpty: Boolean; begin Result := not Assigned(FProgRoot) or (TreRootNode(FProgRoot).FList.Count = 0); end; procedure TecRegExpr.Compile(const AExpression: AnsiString); var Pos: integer; begin {$IFDEF RE_DEBUG} LastNodeID := 0; {$ENDIF} FMatchOK := False; if not Assigned(FProgRoot) then FProgRoot := TreRootNode.Create(Self) else TreRootNode(FProgRoot).Clear; FUnicodeCompiled := False; Pos := 1; try if AExpression <> '' then TreRootNode(FProgRoot).Compile(AExpression, Pos, FModifiers) except ClearRoot; raise; end; end; procedure TecRegExpr.Compile(const AExpression: UCString); var Pos: integer; begin {$IFDEF RE_DEBUG} LastNodeID := 0; {$ENDIF} FMatchOK := False; if not Assigned(FProgRoot) then FProgRoot := TreRootNode.Create(Self) else TreRootNode(FProgRoot).Clear; Pos := 1; try if AExpression <> '' then TreRootNode(FProgRoot).Compile(AExpression, Pos, FModifiers); except ClearRoot; raise; end; FUnicodeCompiled := True; end; function TecRegExpr.Compile: Boolean; begin Result := Compile(True); end; function TecRegExpr.Compile(AsUnicode: Boolean): Boolean; begin try if IsEmpty or (AsUnicode xor FUnicodeCompiled) then Compile(FExpression); except end; Result := not IsEmpty; end; function TecRegExpr.GetIsInvalid: Boolean; begin Result := not Compile(False); end; function TecRegExpr.GetModifier(const Index: Integer): boolean; begin Result := (FModifiers and Index) <> 0; end; function TecRegExpr.GetModifierStr: ecString; const ModLet: ecString = 'irsgmx'; var s1, s2: string; i: integer; begin s1 := ''; s2 := ''; for i := 0 to 5 do if (FModifiers and (1 shl i)) <> 0 then s1 := s1 + ModLet[i + 1] else s2 := s2 + ModLet[i + 1]; Result := '(?' + s1; if s2 <> '' then Result := Result + '-' + s2; Result := Result + ')'; end; function TecRegExpr.Match(const InputString: UCString; var aPos: integer; Back: Boolean): Boolean; begin Result := Compile(True); // ensure compiling and validity if Result then begin if aPos < 1 then aPos := 1; Result := TreRootNode(FProgRoot).MatchStr(InputString, aPos, Back); FMatchOK := Result; end; end; function TecRegExpr.MatchLength(const InputString: AnsiString; aPos: integer; Back: Boolean): integer; begin Result := aPos; if Match(InputString, aPos, Back) then begin if Back then Result := Result - aPos else Result := aPos - Result; end else Result := 0; end; function TecRegExpr.MatchLength(const InputString: UCString; aPos: integer; Back: Boolean): integer; begin Result := aPos; if Match(InputString, aPos, Back) then begin if Back then Result := Result - aPos else Result := aPos - Result; end else Result := 0; end; //AT fix for SetExpression function _MultilineTextToOneline(const AText: string): string; var L: TStringList; S: string; i, j: integer; begin L:= TStringList.Create; try L.TextLineBreakStyle:= tlbsLF; //force LF L.Text:= AText; //delete comments "#nnnnnnnnnnn" for i:= 0 to L.Count-1 do begin S:= L[i]; for j:= 1 to Length(S) do if (S[j]='#') and ((j=1) or (S[j-1]<>'\')) then begin Delete(S, j, Maxint); L[i]:= S; Break end; end; Result:= Trim(L.Text); Result:= StringReplace(Result, #10, ' ', [rfReplaceAll]); finally FreeAndNil(L); end; end; procedure TecRegExpr.SetExpression(const Value: ecString); begin FExpression:= _MultilineTextToOneline(Value); //AT //if Pos('#', FExpression)>0 then // showmessage('regex'#13+FExpression); ClearRoot; end; procedure TecRegExpr.SetModifier(const Index: Integer; const Value: boolean); begin if Value then FModifiers := FModifiers or Index else FModifiers := FModifiers and not Index; ClearRoot; end; procedure TecRegExpr.SetModifiers(const Value: Word); begin FModifiers := Value; ClearRoot; end; procedure TecRegExpr.SetModifierStr(const Value: ecString); begin if (Length(Value) >= 3) and (Copy(Value, 1, 2) = '(?') then ParseModifiers(AnsiString(Copy(Value, 3, Length(Value) - 3)), FModifiers); end; function TecRegExpr.GetMatchLen(Idx: integer): integer; begin Result := -1; if FMatchOK then with TreRootNode(FProgRoot) do if (idx < FSubExpr.Count) then with TreSubExpr(FSubExpr[Idx]) do if (FStart <> -1) and (FEnd <> -1) then Result := FEnd - FStart; end; function TecRegExpr.GetMatchPos(Idx: integer): integer; begin Result := -1; if FMatchOK then with TreRootNode(FProgRoot) do if (idx < FSubExpr.Count) then with TreSubExpr(FSubExpr[Idx]) do Result := FStart; end; function TecRegExpr.GetSubExprMatchCount: integer; var i: integer; begin Result := -1; if FMatchOK then with TreRootNode(FProgRoot) do for i := 0 to FSubExpr.Count - 1 do with TreSubExpr(FSubExpr[i]) do if (FStart <> -1) and (FEnd <> -1) then Inc(Result); end; function TecRegExpr.GetMatch(const InputString: AnsiString; SubIdx: integer): AnsiString; begin Result := ''; if FMatchOK then with TreRootNode(FProgRoot) do if (SubIdx < FSubExpr.Count) then with TreSubExpr(FSubExpr[SubIdx]) do if (FStart <> -1) and (FEnd <> -1) then Result := Copy(InputString, FStart, FEnd - FStart); end; function TecRegExpr.GetMatch(const InputString: UCString; SubIdx: integer): UCString; begin Result := ''; if FMatchOK then with TreRootNode(FProgRoot) do if (SubIdx < FSubExpr.Count) then with TreSubExpr(FSubExpr[SubIdx]) do if (FStart <> -1) and (FEnd <> -1) then Result := Copy(InputString, FStart, FEnd - FStart); end; function TecRegExpr.Substitute(const InputString, ATemplate: ecString): ecString; var i: integer; C: ecChar; begin // if not FMatchOK then // raise Exception.Create('No matched string'); Result := ''; i := 1; while i <= Length(ATemplate) do begin if (ATemplate[i] = '\') and (i < Length(ATemplate)) then begin inc(i); if IsDigitChar(ATemplate[i]) then Result := Result + GetMatch(InputString, StrToInt(ATemplate[i])) else begin C := ecChar(Ord(GetEscape(ATemplate, i))); if C = #0 then C := ATemplate[i]; Result := Result + C; end; end else Result := Result + ATemplate[i]; inc(i); end; end; procedure TecRegExpr.Assign(Source: TPersistent); begin if Source is TecRegExpr then with Source as TecRegExpr do begin Self.Expression := Expression; Self.ModifierMask := ModifierMask; end else inherited; end; procedure TecRegExpr.SetCodePage(const Value: Cardinal); begin if FCodePage <> Value then begin FCodePage := Value; ClearRoot; end; end; procedure TecRegExpr.ParseModifiers(const S: AnsiString; var Modifiers: Word); var IsOn : boolean; i: integer; procedure SetModif(m: integer); begin if (m and FModifiersStatic) = 0 then if IsOn then Modifiers := Modifiers or m else Modifiers := Modifiers and not m; end; begin IsOn := true; for i := 1 to Length(S) do case S[i] of '-': IsOn := false; 'i','I': SetModif(MaskModI); 'r','R': SetModif(MaskModR); 's','S': SetModif(MaskModS); 'g','G': SetModif(MaskModG); 'm','M': SetModif(MaskModM); 'x','X': SetModif(MaskModX); else raise Exception.Create(zreUnexpectedModifier); end; end; // ============================================================================= // DEBUGGER // ============================================================================= {$IFDEF RE_DEBUG} // Fill tree with compiled nodes {debug purpose} procedure REDebugCompiledBuildTree(RE: TecRegExpr; TV: TTreeView); function GetNodeCaption(Node: TRENodeBase): string; begin if Node.ClassType = TCharSeqNode then Result := ecChar(TCharSeqNode(Node).FChar) else if Node.ClassType = TCharSetNode then begin Result := '[ ... ]'; end else if Node.ClassType = TSpecCheckNode then Result := '! '+ TSpecCheckNode(Node).FCheckType + ' !' else if Node.ClassType = TZeroWidth then Result := '0' else Result := '<' + Node.ClassName + '>'; Result := IntToStr(Node.FNodeId) + ' ' + Result + Format(' {%d, %d}', [Node.FLoopMin, Node.FLoopMax]); end; procedure AddNode(Node: TRENodeBase; Prn: TTreeNode); var tn: TTreeNode; i: integer; begin tn := TV.Items.AddChild(Prn, GetNodeCaption(Node)); if Node is TreListNodeBase then for i := 0 to TreListNodeBase(Node).FList.Count - 1 do AddNode(TRENodeBase(TreListNodeBase(Node).FList[i]), tn); if Node is TZeroWidth then AddNode(TZeroWidth(Node).FBranch, tn); end; begin TV.Items.Clear; AddNode(TreRootNode(RE.FProgRoot), nil); end; {$ENDIF} end.