{ *************************************************************************** } { } { EControl Syntax Editor SDK } { } { Copyright (c) 2004 - 2015 EControl Ltd., Zaharov Michael } { www.econtrol.ru } { support@econtrol.ru } { } { *************************************************************************** } {$mode delphi} unit ecSyntGramma; interface uses Classes, Dialogs, ecStrUtils, ATStringProc_TextBuffer; type TParserRuleItem = class; TParserRule = class; TParserRuleBranch = class; TParserItemType = (itTerminal, // "aaa" itTerminalNoCase, // 'aaa' itTokenRule, // itParserRule); // aaa TTokenHolder = class protected function GetTokenCount: integer; virtual; abstract; function GetTokenType(Index: integer): integer; virtual; abstract; function GetTokenStr(Index: integer): ecString; virtual; abstract; end; TParserNode = class protected function GetCount: integer; virtual; abstract; function GetNodes(Index: integer): TParserNode; virtual; abstract; function GetFirstToken: integer; virtual; abstract; function GetLastToken: integer; virtual; abstract; public property Count: integer read GetCount; property Nodes[Index: integer]: TParserNode read GetNodes; default; property FirstToken: integer read GetFirstToken; property LastToken: integer read GetLastToken; end; TParserBranchNode = class(TParserNode) private FNodes: TList; FRule: TParserRuleBranch; protected function GetCount: integer; override; function GetNodes(Index: integer): TParserNode; override; function GetFirstToken: integer; override; function GetLastToken: integer; override; public constructor Create(ARule: TParserRuleBranch); destructor Destroy; override; procedure Add(Node: TParserNode); property Rule: TParserRuleBranch read FRule; end; TParserTermNode = class(TParserNode) private FRule: TParserRuleItem; FTokenIndex: integer; protected function GetCount: integer; override; function GetNodes(Index: integer): TParserNode; override; function GetFirstToken: integer; override; function GetLastToken: integer; override; public constructor Create(ARule: TParserRuleItem; ATokenIndex: integer); property Rule: TParserRuleItem read FRule; end; TParserRuleItem = class private FItemType: TParserItemType; FTerminal: string; FTokenType: integer; FParserRule: TParserRule; FBranch: TParserRuleBranch; FRepMin: integer; FRepMax: integer; FOwnRule: Boolean; public constructor Create; destructor Destroy; override; function IsValid: Boolean; property ItemType: TParserItemType read FItemType; property Terminal: string read FTerminal; property TokenType: integer read FTokenType; property ParserRule: TParserRule read FParserRule; property Branch: TParserRuleBranch read FBranch; property RepMin: integer read FRepMin; property RepMax: integer read FRepMax; property IsSubRule: Boolean read FOwnRule; end; TParserRuleBranch = class private FItems: TList; FRule: TParserRule; function GetCount: integer; function GetItems(Index: integer): TParserRuleItem; public constructor Create; destructor Destroy; override; function IsValid: Boolean; property Count: integer read GetCount; property Items[Index: integer]: TParserRuleItem read GetItems; property Rule: TParserRule read FRule; end; TParserRule = class private FBranches: TList; FName: string; FIndex: integer; function GetCount: integer; function GetBranches(Index: integer): TParserRuleBranch; public constructor Create; destructor Destroy; override; function IsValid: Boolean; property Count: integer read GetCount; property Branches[Index: integer]: TParserRuleBranch read GetBranches; property Name: string read FName; end; TGrammaAnalyzer = class(TPersistent) private FGrammaRules: TList; FGrammaDefs: TATStringBuffer; FRoot: TParserRule; FSkipRule: TParserRule; FOnChange: TNotifyEvent; function GetGrammaCount: integer; function GetGrammaRules(Index: integer): TParserRule; function GetGramma: ecString; procedure SetGramma(const Value: ecString); protected function GetGrammaLines: TATStringBuffer; procedure Changed; dynamic; public constructor Create; destructor Destroy; override; procedure Assign(Source: TPersistent); override; procedure Clear; function ParserRuleByName(const AName: string): TParserRule; function IndexOfRule(Rule: TParserRule): integer; function CompileGramma(TokenNames: TStrings): Boolean; function ParseRule(FromIndex: integer; Rule: TParserRule; Tags: TTokenHolder): TParserNode; function TestRule(FromIndex: integer; Rule: TParserRule; Tags: TTokenHolder): integer; property GrammaCount: integer read GetGrammaCount; property GrammaRules[Index : integer]: TParserRule read GetGrammaRules; default; property OnChange: TNotifyEvent read FOnChange write FOnChange; published property Gramma: ecString read GetGramma write SetGramma; end; TTraceParserProc = procedure(Sender: TObject; CurIdx: integer; const TraceStr: string) of object; var TraceParserProc: TTraceParserProc; implementation uses SysUtils, Contnrs, ecSyntAnal; { TParserRuleItem } constructor TParserRuleItem.Create; begin inherited Create; FTokenType := -1; FRepMin := 1; FRepMax := 1; FOwnRule := False; end; destructor TParserRuleItem.Destroy; begin if FOwnRule then FParserRule.Free; inherited; end; function TParserRuleItem.IsValid: Boolean; begin case FItemType of itTerminal, itTerminalNoCase: Result := FTerminal <> ''; itTokenRule: Result := FTokenType <> -1; itParserRule: Result := FParserRule <> nil; else Result := False; end; end; { TParserRuleBranch } constructor TParserRuleBranch.Create; begin inherited Create; FItems := TObjectList.Create; end; destructor TParserRuleBranch.Destroy; begin FItems.Free; inherited; end; function TParserRuleBranch.GetCount: integer; begin Result := FItems.Count; end; function TParserRuleBranch.GetItems(Index: integer): TParserRuleItem; begin Result := TParserRuleItem(FItems[Index]); end; function TParserRuleBranch.IsValid: Boolean; var i: integer; begin for i := 0 to FItems.Count - 1 do if not Items[i].IsValid then begin Result := False; Exit; end; Result := True; end; { TParserRule } constructor TParserRule.Create; begin inherited Create; FBranches := TObjectList.Create; end; destructor TParserRule.Destroy; begin FBranches.Free; inherited; end; function TParserRule.GetBranches(Index: integer): TParserRuleBranch; begin Result := TParserRuleBranch(FBranches[Index]); end; function TParserRule.GetCount: integer; begin Result := FBranches.Count; end; function TParserRule.IsValid: Boolean; var i: integer; begin for i := 0 to Count - 1 do if not Branches[i].IsValid then begin Result := False; Exit; end; Result := (Count > 1) or (Count = 1) and (Branches[0].Count > 0); end; { TParserNode } procedure TParserBranchNode.Add(Node: TParserNode); begin if FNodes = nil then FNodes := TObjectList.Create; FNodes.Add(Node); end; constructor TParserBranchNode.Create(ARule: TParserRuleBranch); begin inherited Create; FRule := ARule; end; destructor TParserBranchNode.Destroy; begin if FNodes <> nil then FNodes.Free; inherited; end; function TParserBranchNode.GetCount: integer; begin if FNodes = nil then Result := 0 else Result := FNodes.Count; end; function TParserBranchNode.GetFirstToken: integer; begin if Count > 0 then Result := Nodes[Count - 1].FirstToken else Result := -1; end; function TParserBranchNode.GetLastToken: integer; begin if Count > 0 then Result := Nodes[0].LastToken else Result := -1; end; function TParserBranchNode.GetNodes(Index: integer): TParserNode; begin if FNodes <> nil then Result := TParserNode(FNodes[Index]) else Result := nil; end; { TParserTermNode } constructor TParserTermNode.Create(ARule: TParserRuleItem; ATokenIndex: integer); begin inherited Create; FRule := ARule; FTokenIndex := ATokenIndex; end; function TParserTermNode.GetCount: integer; begin Result := 0; end; function TParserTermNode.GetFirstToken: integer; begin Result := FTokenIndex; end; function TParserTermNode.GetLastToken: integer; begin Result := FTokenIndex; end; function TParserTermNode.GetNodes(Index: integer): TParserNode; begin Result := nil; end; { TGrammaAnalyzer } constructor TGrammaAnalyzer.Create; begin inherited; FGrammaRules := TObjectList.Create; FGrammaDefs := TATStringBuffer.Create; end; destructor TGrammaAnalyzer.Destroy; begin Clear; FGrammaDefs.Free; FGrammaRules.Free; inherited; end; procedure TGrammaAnalyzer.Clear; begin FGrammaRules.Clear; FGrammaDefs.Clear; FRoot := nil; FSkipRule := nil; end; procedure TGrammaAnalyzer.Assign(Source: TPersistent); begin if Source is TGrammaAnalyzer then Gramma := (Source as TGrammaAnalyzer).Gramma; end; function TGrammaAnalyzer.ParserRuleByName(const AName: string): TParserRule; var i : integer; begin if AName <> '' then for i := 0 to FGrammaRules.Count - 1 do if SameText(AName, GrammaRules[i].Name) then begin Result := GrammaRules[i]; Exit; end; Result := nil; end; function TGrammaAnalyzer.GetGrammaCount: integer; begin Result := FGrammaRules.Count; end; function TGrammaAnalyzer.GetGrammaRules(Index: integer): TParserRule; begin Result := TParserRule(FGrammaRules[Index]); end; function TGrammaAnalyzer.IndexOfRule(Rule: TParserRule): integer; begin Result := FGrammaRules.IndexOf(Rule); end; function TGrammaAnalyzer.GetGramma: ecString; begin Result := FGrammaDefs.FText; end; procedure TGrammaAnalyzer.SetGramma(const Value: ecString); begin FGrammaDefs.SetupSlow(Value); Changed; end; function TGrammaAnalyzer.GetGrammaLines: TATStringBuffer; begin Result := FGrammaDefs; end; // Compiling Gramma rules function TGrammaAnalyzer.CompileGramma(TokenNames: TStrings): Boolean; var Lex: TecSyntAnalyzer; Res: TecClientSyntAnalyzer; Cur, i: integer; Rule: TParserRule; procedure AddTokenRule(ATokenType: integer; Expr: string); var TokenRule: TecTokenRule; begin TokenRule := Lex.TokenRules.Add; TokenRule.TokenType := ATokenType; TokenRule.Expression := Expr; end; function ValidCur: Boolean; begin Result := Cur < Res.TagCount; end; procedure SkipComments; begin while ValidCur do if Res.Tags[Cur].TokenType = 0 then Inc(Cur) else Exit; end; procedure ReadRepeater(RuleItem: TParserRuleItem); var s: string; begin if not ValidCur then Exit; if Res.Tags[Cur].TokenType = 8 then begin s := Res.TagStr[Cur]; if s = '+' then RuleItem.FRepMax := -1 else if s = '?' then RuleItem.FRepMin := 0 else if s = '*' then begin RuleItem.FRepMin := 0; RuleItem.FRepMax := -1; end; Inc(Cur); end; end; function ExtractRule: TParserRule; forward; function ExtractItem: TParserRuleItem; var t: TParserItemType; s: string; R: TParserRule; begin Result := nil; SkipComments; if not ValidCur then Exit; R := nil; case Res.Tags[Cur].TokenType of 1: t := itTerminal; 2: t := itTerminalNoCase; 3: t := itTokenRule; 4: t := itParserRule; 9: begin // extract sub-rule t := itParserRule; R := ExtractRule; if not ValidCur or (R = nil) or (Res.Tags[Cur].TokenType <> 10) then Exit; end; else Exit; end; s := Res.TagStr[cur]; if t <> itParserRule then begin if Length(s) <= 2 then Exit; Delete(s, Length(s), 1); Delete(s, 1, 1); end; Result := TParserRuleItem.Create; Result.FItemType := t; Result.FTerminal := s; Result.FParserRule := R; Result.FOwnRule := R <> nil; Inc(Cur); ReadRepeater(Result); end; function ExtractBranch: TParserRuleBranch; var Item, last: TParserRuleItem; apos, sv_pos: integer; begin Result := TParserRuleBranch.Create; last := nil; sv_pos := 0; // to avoid warning while ValidCur do begin apos := Cur; Item := ExtractItem; if Item <> nil then begin sv_pos := apos; Result.FItems.Add(Item); Item.FBranch := Result; last := item end else if ValidCur then if (Res.Tags[Cur].TokenType = 7) or (Res.Tags[Cur].TokenType = 6) or (Res.Tags[Cur].TokenType = 10) then Exit else begin if (Res.Tags[Cur].TokenType = 5) and (last <> nil) and (last.ItemType = itParserRule) then begin Cur := sv_pos; Result.FItems.Delete(Result.FItems.Count - 1); Exit; end; Break; end; end; Result.Free; Result := nil; end; function ExtractRule: TParserRule; var Branch: TParserRuleBranch; s: string; begin Result := nil; SkipComments; S := ''; if not ValidCur then Exit; case Res.Tags[Cur].TokenType of 4: begin s := Res.TagStr[Cur]; Inc(Cur); SkipComments; if not ValidCur or (Res.Tags[Cur].TokenType <> 5) then Exit; Inc(Cur); end; 9: Inc(Cur); else Exit; end; Result := TParserRule.Create; Result.FName := s; while ValidCur do begin Branch := ExtractBranch; if Branch = nil then Break; Result.FBranches.Add(Branch); Branch.FRule := Result; if Res.Tags[Cur].TokenType = 6 then Inc(Cur) else if Res.Tags[Cur].TokenType = 7 then begin Inc(Cur); Exit; end else if Res.Tags[Cur].TokenType = 10 then Exit else if Result.Count > 0 then Exit else break; end; Result.Free; Result := nil; end; procedure LinkRuleItems(Rule: TParserRule); var j, k: integer; Item: TParserRuleItem; begin for j := 0 to Rule.Count - 1 do for k := 0 to Rule.Branches[j].Count - 1 do begin Item := Rule.Branches[j].Items[k]; case Item.ItemType of itTokenRule: Item.FTokenType := TokenNames.IndexOf(Item.Terminal); itParserRule: if Item.IsSubRule then LinkRuleItems(Item.FParserRule) else Item.FParserRule := ParserRuleByName(Item.Terminal); end; end; end; begin Result := True; FGrammaRules.Clear; FRoot := nil; FSkipRule := nil; if GetGrammaLines.Count > 0 then begin Lex := TecSyntAnalyzer.Create(nil); try Res := Lex.AddClient(nil, GetGrammaLines); if Res <> nil then try // Prepare Lexer AddTokenRule(0, '//.*'); // comment AddTokenRule(0, '(?s)/\*.*?(\*/|\Z)'); // comment AddTokenRule(1, '".*?"'); // Terminal AddTokenRule(2, #39'.*?'#39); // Terminal No case AddTokenRule(3, '<.*?>'); // Terminal -> Token rule AddTokenRule(4, '\w+'); // Rule AddTokenRule(5, '='); // Equal AddTokenRule(6, '\|'); // Or AddTokenRule(7, ';'); // Rule stop AddTokenRule(8, '[\*\+\?]'); // Repeaters AddTokenRule(9, '[\(]'); // Open sub-rule AddTokenRule(10,'[\)]'); // Close sub-rule // Extract all tokens Res.Analyze; // extract rules Cur := 0; while ValidCur do begin Rule := ExtractRule; if Rule <> nil then begin Rule.FIndex := FGrammaRules.Count; FGrammaRules.Add(Rule); end else Break; end; // performs linking for i := 0 to FGrammaRules.Count - 1 do LinkRuleItems(TParserRule(FGrammaRules[i])); finally Res.Free; end; finally Lex.Free; end; FRoot := ParserRuleByName('Root'); FSkipRule := ParserRuleByName('Skip'); end; end; function TGrammaAnalyzer.TestRule(FromIndex: integer; Rule: TParserRule; Tags: TTokenHolder): integer; var FRootProgNode: TParserNode; // Results of Gramma analisys begin FRootProgNode := ParseRule(FromIndex, Rule, Tags); if Assigned(FRootProgNode) then begin Result := FRootProgNode.FirstToken; FRootProgNode.Free; end else Result := -1; end; function TGrammaAnalyzer.ParseRule(FromIndex: integer; Rule: TParserRule; Tags: TTokenHolder): TParserNode; var CurIdx: integer; CallStack: TList; In_SkipRule: Boolean; function RuleProcess(Rule: TParserRule): TParserNode; forward; procedure SipComments; var skipped: TParserNode; begin if not In_SkipRule and (FSkipRule <> nil) then try In_SkipRule := True; skipped := RuleProcess(FSkipRule); if skipped <> nil then skipped.Free; finally In_SkipRule := False; end; end; function ItemProcess(Item: TParserRuleItem): TParserNode; begin Result := nil; if CurIdx >= 0 then if Item.FItemType = itParserRule then Result := RuleProcess(Item.FParserRule) else begin case Item.FItemType of itTerminal: if Tags.GetTokenStr(CurIdx) = Item.FTerminal then Result := TParserTermNode.Create(Item, CurIdx); itTerminalNoCase: if SameText(Tags.GetTokenStr(CurIdx), Item.FTerminal) then Result := TParserTermNode.Create(Item, CurIdx); itTokenRule: if (Item.FTokenType = 0) or (Tags.GetTokenType(CurIdx) = Item.FTokenType) then Result := TParserTermNode.Create(Item, CurIdx); end; if Result <> nil then begin Dec(CurIdx); SipComments; end; end; end; function BranchProcess(Branch: TParserRuleBranch): TParserNode; var i, sv_idx, rep_cnt: integer; node: TParserNode; begin if Branch.Count = 0 then begin Result := nil; Exit; end; sv_idx := CurIdx; Result := TParserBranchNode.Create(Branch); for i := Branch.Count - 1 downto 0 do begin rep_cnt := 0; node := nil; while ((rep_cnt = 0) or (node <> nil)) and ((Branch.Items[i].FRepMax = -1) or (rep_cnt < Branch.Items[i].FRepMax)) do begin node := ItemProcess(Branch.Items[i]); if node <> nil then TParserBranchNode(Result).Add(node) else if rep_cnt < Branch.Items[i].FRepMin then begin CurIdx := sv_idx; Result.Free; Result := nil; Exit; end else Break; Inc(rep_cnt); end; end; end; function RuleProcess(Rule: TParserRule): TParserNode; var i, handle: integer; begin // Check stack handle := CurIdx shl 16 + Rule.FIndex; if CallStack.IndexOf(TObject(handle)) <> -1 then raise Exception.Create('Circular stack'); CallStack.Add(TObject(handle)); try Result := nil; for i := Rule.Count - 1 downto 0 do begin // if Assigned(TraceParserProc) and not In_SkipRule then // TraceParserProc(Self, CurIdx, ' ' + Rule.FName); Result := BranchProcess(Rule.Branches[i]); if Result <> nil then Exit; end; finally CallStack.Delete(CallStack.Count - 1); end; end; begin CallStack := TList.Create; try CurIdx := FromIndex; Result := RuleProcess(Rule); finally CallStack.Free; end; end; procedure TGrammaAnalyzer.Changed; begin if Assigned(FOnChange) then FOnChange(Self); end; initialization TraceParserProc := nil; end.