lasarus_compotents/EControl/ec/ecsyntgramma.pas

813 lines
20 KiB
ObjectPascal

{ *************************************************************************** }
{ }
{ EControl Syntax Editor SDK }
{ }
{ Copyright (c) 2004 - 2015 EControl Ltd., Zaharov Michael }
{ www.econtrol.ru }
{ support@econtrol.ru }
{ }
{ *************************************************************************** }
{$mode delphi}
unit ecSyntGramma;
interface
uses
Classes,
Dialogs,
ecStrUtils,
ATStringProc_TextBuffer;
type
TParserRuleItem = class;
TParserRule = class;
TParserRuleBranch = class;
TParserItemType = (itTerminal, // "aaa"
itTerminalNoCase, // 'aaa'
itTokenRule, // <aaa>
itParserRule); // aaa
TTokenHolder = class
protected
function GetTokenCount: integer; virtual; abstract;
function GetTokenType(Index: integer): integer; virtual; abstract;
function GetTokenStr(Index: integer): ecString; virtual; abstract;
end;
TParserNode = class
protected
function GetCount: integer; virtual; abstract;
function GetNodes(Index: integer): TParserNode; virtual; abstract;
function GetFirstToken: integer; virtual; abstract;
function GetLastToken: integer; virtual; abstract;
public
property Count: integer read GetCount;
property Nodes[Index: integer]: TParserNode read GetNodes; default;
property FirstToken: integer read GetFirstToken;
property LastToken: integer read GetLastToken;
end;
TParserBranchNode = class(TParserNode)
private
FNodes: TList;
FRule: TParserRuleBranch;
protected
function GetCount: integer; override;
function GetNodes(Index: integer): TParserNode; override;
function GetFirstToken: integer; override;
function GetLastToken: integer; override;
public
constructor Create(ARule: TParserRuleBranch);
destructor Destroy; override;
procedure Add(Node: TParserNode);
property Rule: TParserRuleBranch read FRule;
end;
TParserTermNode = class(TParserNode)
private
FRule: TParserRuleItem;
FTokenIndex: integer;
protected
function GetCount: integer; override;
function GetNodes(Index: integer): TParserNode; override;
function GetFirstToken: integer; override;
function GetLastToken: integer; override;
public
constructor Create(ARule: TParserRuleItem; ATokenIndex: integer);
property Rule: TParserRuleItem read FRule;
end;
TParserRuleItem = class
private
FItemType: TParserItemType;
FTerminal: string;
FTokenType: integer;
FParserRule: TParserRule;
FBranch: TParserRuleBranch;
FRepMin: integer;
FRepMax: integer;
FOwnRule: Boolean;
public
constructor Create;
destructor Destroy; override;
function IsValid: Boolean;
property ItemType: TParserItemType read FItemType;
property Terminal: string read FTerminal;
property TokenType: integer read FTokenType;
property ParserRule: TParserRule read FParserRule;
property Branch: TParserRuleBranch read FBranch;
property RepMin: integer read FRepMin;
property RepMax: integer read FRepMax;
property IsSubRule: Boolean read FOwnRule;
end;
TParserRuleBranch = class
private
FItems: TList;
FRule: TParserRule;
function GetCount: integer;
function GetItems(Index: integer): TParserRuleItem;
public
constructor Create;
destructor Destroy; override;
function IsValid: Boolean;
property Count: integer read GetCount;
property Items[Index: integer]: TParserRuleItem read GetItems;
property Rule: TParserRule read FRule;
end;
TParserRule = class
private
FBranches: TList;
FName: string;
FIndex: integer;
function GetCount: integer;
function GetBranches(Index: integer): TParserRuleBranch;
public
constructor Create;
destructor Destroy; override;
function IsValid: Boolean;
property Count: integer read GetCount;
property Branches[Index: integer]: TParserRuleBranch read GetBranches;
property Name: string read FName;
end;
TGrammaAnalyzer = class(TPersistent)
private
FGrammaRules: TList;
FGrammaDefs: TATStringBuffer;
FRoot: TParserRule;
FSkipRule: TParserRule;
FOnChange: TNotifyEvent;
function GetGrammaCount: integer;
function GetGrammaRules(Index: integer): TParserRule;
function GetGramma: ecString;
procedure SetGramma(const Value: ecString);
protected
function GetGrammaLines: TATStringBuffer;
procedure Changed; dynamic;
public
constructor Create;
destructor Destroy; override;
procedure Assign(Source: TPersistent); override;
procedure Clear;
function ParserRuleByName(const AName: string): TParserRule;
function IndexOfRule(Rule: TParserRule): integer;
function CompileGramma(TokenNames: TStrings): Boolean;
function ParseRule(FromIndex: integer; Rule: TParserRule; Tags: TTokenHolder): TParserNode;
function TestRule(FromIndex: integer; Rule: TParserRule; Tags: TTokenHolder): integer;
property GrammaCount: integer read GetGrammaCount;
property GrammaRules[Index : integer]: TParserRule read GetGrammaRules; default;
property OnChange: TNotifyEvent read FOnChange write FOnChange;
published
property Gramma: ecString read GetGramma write SetGramma;
end;
TTraceParserProc = procedure(Sender: TObject; CurIdx: integer; const TraceStr: string) of object;
var
TraceParserProc: TTraceParserProc;
implementation
uses SysUtils, Contnrs, ecSyntAnal;
{ TParserRuleItem }
constructor TParserRuleItem.Create;
begin
inherited Create;
FTokenType := -1;
FRepMin := 1;
FRepMax := 1;
FOwnRule := False;
end;
destructor TParserRuleItem.Destroy;
begin
if FOwnRule then
FParserRule.Free;
inherited;
end;
function TParserRuleItem.IsValid: Boolean;
begin
case FItemType of
itTerminal,
itTerminalNoCase: Result := FTerminal <> '';
itTokenRule: Result := FTokenType <> -1;
itParserRule: Result := FParserRule <> nil;
else Result := False;
end;
end;
{ TParserRuleBranch }
constructor TParserRuleBranch.Create;
begin
inherited Create;
FItems := TObjectList.Create;
end;
destructor TParserRuleBranch.Destroy;
begin
FItems.Free;
inherited;
end;
function TParserRuleBranch.GetCount: integer;
begin
Result := FItems.Count;
end;
function TParserRuleBranch.GetItems(Index: integer): TParserRuleItem;
begin
Result := TParserRuleItem(FItems[Index]);
end;
function TParserRuleBranch.IsValid: Boolean;
var i: integer;
begin
for i := 0 to FItems.Count - 1 do
if not Items[i].IsValid then
begin
Result := False;
Exit;
end;
Result := True;
end;
{ TParserRule }
constructor TParserRule.Create;
begin
inherited Create;
FBranches := TObjectList.Create;
end;
destructor TParserRule.Destroy;
begin
FBranches.Free;
inherited;
end;
function TParserRule.GetBranches(Index: integer): TParserRuleBranch;
begin
Result := TParserRuleBranch(FBranches[Index]);
end;
function TParserRule.GetCount: integer;
begin
Result := FBranches.Count;
end;
function TParserRule.IsValid: Boolean;
var i: integer;
begin
for i := 0 to Count - 1 do
if not Branches[i].IsValid then
begin
Result := False;
Exit;
end;
Result := (Count > 1) or (Count = 1) and (Branches[0].Count > 0);
end;
{ TParserNode }
procedure TParserBranchNode.Add(Node: TParserNode);
begin
if FNodes = nil then FNodes := TObjectList.Create;
FNodes.Add(Node);
end;
constructor TParserBranchNode.Create(ARule: TParserRuleBranch);
begin
inherited Create;
FRule := ARule;
end;
destructor TParserBranchNode.Destroy;
begin
if FNodes <> nil then FNodes.Free;
inherited;
end;
function TParserBranchNode.GetCount: integer;
begin
if FNodes = nil then Result := 0
else Result := FNodes.Count;
end;
function TParserBranchNode.GetFirstToken: integer;
begin
if Count > 0 then
Result := Nodes[Count - 1].FirstToken
else
Result := -1;
end;
function TParserBranchNode.GetLastToken: integer;
begin
if Count > 0 then
Result := Nodes[0].LastToken
else
Result := -1;
end;
function TParserBranchNode.GetNodes(Index: integer): TParserNode;
begin
if FNodes <> nil then
Result := TParserNode(FNodes[Index])
else
Result := nil;
end;
{ TParserTermNode }
constructor TParserTermNode.Create(ARule: TParserRuleItem;
ATokenIndex: integer);
begin
inherited Create;
FRule := ARule;
FTokenIndex := ATokenIndex;
end;
function TParserTermNode.GetCount: integer;
begin
Result := 0;
end;
function TParserTermNode.GetFirstToken: integer;
begin
Result := FTokenIndex;
end;
function TParserTermNode.GetLastToken: integer;
begin
Result := FTokenIndex;
end;
function TParserTermNode.GetNodes(Index: integer): TParserNode;
begin
Result := nil;
end;
{ TGrammaAnalyzer }
constructor TGrammaAnalyzer.Create;
begin
inherited;
FGrammaRules := TObjectList.Create;
FGrammaDefs := TATStringBuffer.Create;
end;
destructor TGrammaAnalyzer.Destroy;
begin
Clear;
FGrammaDefs.Free;
FGrammaRules.Free;
inherited;
end;
procedure TGrammaAnalyzer.Clear;
begin
FGrammaRules.Clear;
FGrammaDefs.Clear;
FRoot := nil;
FSkipRule := nil;
end;
procedure TGrammaAnalyzer.Assign(Source: TPersistent);
begin
if Source is TGrammaAnalyzer then
Gramma := (Source as TGrammaAnalyzer).Gramma;
end;
function TGrammaAnalyzer.ParserRuleByName(const AName: string): TParserRule;
var i : integer;
begin
if AName <> '' then
for i := 0 to FGrammaRules.Count - 1 do
if SameText(AName, GrammaRules[i].Name) then
begin
Result := GrammaRules[i];
Exit;
end;
Result := nil;
end;
function TGrammaAnalyzer.GetGrammaCount: integer;
begin
Result := FGrammaRules.Count;
end;
function TGrammaAnalyzer.GetGrammaRules(Index: integer): TParserRule;
begin
Result := TParserRule(FGrammaRules[Index]);
end;
function TGrammaAnalyzer.IndexOfRule(Rule: TParserRule): integer;
begin
Result := FGrammaRules.IndexOf(Rule);
end;
function TGrammaAnalyzer.GetGramma: ecString;
begin
Result := FGrammaDefs.FText;
end;
procedure TGrammaAnalyzer.SetGramma(const Value: ecString);
begin
FGrammaDefs.SetupSlow(Value);
Changed;
end;
function TGrammaAnalyzer.GetGrammaLines: TATStringBuffer;
begin
Result := FGrammaDefs;
end;
// Compiling Gramma rules
function TGrammaAnalyzer.CompileGramma(TokenNames: TStrings): Boolean;
var Lex: TecSyntAnalyzer;
Res: TecClientSyntAnalyzer;
Cur, i: integer;
Rule: TParserRule;
procedure AddTokenRule(ATokenType: integer; Expr: string);
var TokenRule: TecTokenRule;
begin
TokenRule := Lex.TokenRules.Add;
TokenRule.TokenType := ATokenType;
TokenRule.Expression := Expr;
end;
function ValidCur: Boolean;
begin
Result := Cur < Res.TagCount;
end;
procedure SkipComments;
begin
while ValidCur do
if Res.Tags[Cur].TokenType = 0 then Inc(Cur)
else Exit;
end;
procedure ReadRepeater(RuleItem: TParserRuleItem);
var s: string;
begin
if not ValidCur then Exit;
if Res.Tags[Cur].TokenType = 8 then
begin
s := Res.TagStr[Cur];
if s = '+' then RuleItem.FRepMax := -1 else
if s = '?' then RuleItem.FRepMin := 0 else
if s = '*' then
begin
RuleItem.FRepMin := 0;
RuleItem.FRepMax := -1;
end;
Inc(Cur);
end;
end;
function ExtractRule: TParserRule; forward;
function ExtractItem: TParserRuleItem;
var t: TParserItemType;
s: string;
R: TParserRule;
begin
Result := nil;
SkipComments;
if not ValidCur then Exit;
R := nil;
case Res.Tags[Cur].TokenType of
1: t := itTerminal;
2: t := itTerminalNoCase;
3: t := itTokenRule;
4: t := itParserRule;
9: begin // extract sub-rule
t := itParserRule;
R := ExtractRule;
if not ValidCur or (R = nil) or
(Res.Tags[Cur].TokenType <> 10) then Exit;
end;
else Exit;
end;
s := Res.TagStr[cur];
if t <> itParserRule then
begin
if Length(s) <= 2 then Exit;
Delete(s, Length(s), 1);
Delete(s, 1, 1);
end;
Result := TParserRuleItem.Create;
Result.FItemType := t;
Result.FTerminal := s;
Result.FParserRule := R;
Result.FOwnRule := R <> nil;
Inc(Cur);
ReadRepeater(Result);
end;
function ExtractBranch: TParserRuleBranch;
var Item, last: TParserRuleItem;
apos, sv_pos: integer;
begin
Result := TParserRuleBranch.Create;
last := nil;
sv_pos := 0; // to avoid warning
while ValidCur do
begin
apos := Cur;
Item := ExtractItem;
if Item <> nil then
begin
sv_pos := apos;
Result.FItems.Add(Item);
Item.FBranch := Result;
last := item
end else
if ValidCur then
if (Res.Tags[Cur].TokenType = 7) or
(Res.Tags[Cur].TokenType = 6) or
(Res.Tags[Cur].TokenType = 10) then Exit
else
begin
if (Res.Tags[Cur].TokenType = 5) and (last <> nil) and
(last.ItemType = itParserRule) then
begin
Cur := sv_pos;
Result.FItems.Delete(Result.FItems.Count - 1);
Exit;
end;
Break;
end;
end;
Result.Free;
Result := nil;
end;
function ExtractRule: TParserRule;
var Branch: TParserRuleBranch;
s: string;
begin
Result := nil;
SkipComments;
S := '';
if not ValidCur then Exit;
case Res.Tags[Cur].TokenType of
4: begin
s := Res.TagStr[Cur];
Inc(Cur);
SkipComments;
if not ValidCur or (Res.Tags[Cur].TokenType <> 5) then Exit;
Inc(Cur);
end;
9: Inc(Cur);
else Exit;
end;
Result := TParserRule.Create;
Result.FName := s;
while ValidCur do
begin
Branch := ExtractBranch;
if Branch = nil then Break;
Result.FBranches.Add(Branch);
Branch.FRule := Result;
if Res.Tags[Cur].TokenType = 6 then Inc(Cur) else
if Res.Tags[Cur].TokenType = 7 then
begin
Inc(Cur);
Exit;
end else
if Res.Tags[Cur].TokenType = 10 then Exit else
if Result.Count > 0 then Exit else break;
end;
Result.Free;
Result := nil;
end;
procedure LinkRuleItems(Rule: TParserRule);
var j, k: integer;
Item: TParserRuleItem;
begin
for j := 0 to Rule.Count - 1 do
for k := 0 to Rule.Branches[j].Count - 1 do
begin
Item := Rule.Branches[j].Items[k];
case Item.ItemType of
itTokenRule: Item.FTokenType := TokenNames.IndexOf(Item.Terminal);
itParserRule: if Item.IsSubRule then
LinkRuleItems(Item.FParserRule)
else
Item.FParserRule := ParserRuleByName(Item.Terminal);
end;
end;
end;
begin
Result := True;
FGrammaRules.Clear;
FRoot := nil;
FSkipRule := nil;
if GetGrammaLines.Count > 0 then
begin
Lex := TecSyntAnalyzer.Create(nil);
try
Res := Lex.AddClient(nil, GetGrammaLines);
if Res <> nil then
try
// Prepare Lexer
AddTokenRule(0, '//.*'); // comment
AddTokenRule(0, '(?s)/\*.*?(\*/|\Z)'); // comment
AddTokenRule(1, '".*?"'); // Terminal
AddTokenRule(2, #39'.*?'#39); // Terminal No case
AddTokenRule(3, '<.*?>'); // Terminal -> Token rule
AddTokenRule(4, '\w+'); // Rule
AddTokenRule(5, '='); // Equal
AddTokenRule(6, '\|'); // Or
AddTokenRule(7, ';'); // Rule stop
AddTokenRule(8, '[\*\+\?]'); // Repeaters
AddTokenRule(9, '[\(]'); // Open sub-rule
AddTokenRule(10,'[\)]'); // Close sub-rule
// Extract all tokens
Res.Analyze;
// extract rules
Cur := 0;
while ValidCur do
begin
Rule := ExtractRule;
if Rule <> nil then
begin
Rule.FIndex := FGrammaRules.Count;
FGrammaRules.Add(Rule);
end
else Break;
end;
// performs linking
for i := 0 to FGrammaRules.Count - 1 do
LinkRuleItems(TParserRule(FGrammaRules[i]));
finally
Res.Free;
end;
finally
Lex.Free;
end;
FRoot := ParserRuleByName('Root');
FSkipRule := ParserRuleByName('Skip');
end;
end;
function TGrammaAnalyzer.TestRule(FromIndex: integer; Rule: TParserRule; Tags: TTokenHolder): integer;
var FRootProgNode: TParserNode; // Results of Gramma analisys
begin
FRootProgNode := ParseRule(FromIndex, Rule, Tags);
if Assigned(FRootProgNode) then
begin
Result := FRootProgNode.FirstToken;
FRootProgNode.Free;
end else
Result := -1;
end;
function TGrammaAnalyzer.ParseRule(FromIndex: integer; Rule: TParserRule; Tags: TTokenHolder): TParserNode;
var CurIdx: integer;
CallStack: TList;
In_SkipRule: Boolean;
function RuleProcess(Rule: TParserRule): TParserNode; forward;
procedure SipComments;
var skipped: TParserNode;
begin
if not In_SkipRule and (FSkipRule <> nil) then
try
In_SkipRule := True;
skipped := RuleProcess(FSkipRule);
if skipped <> nil then skipped.Free;
finally
In_SkipRule := False;
end;
end;
function ItemProcess(Item: TParserRuleItem): TParserNode;
begin
Result := nil;
if CurIdx >= 0 then
if Item.FItemType = itParserRule then
Result := RuleProcess(Item.FParserRule)
else
begin
case Item.FItemType of
itTerminal:
if Tags.GetTokenStr(CurIdx) = Item.FTerminal then
Result := TParserTermNode.Create(Item, CurIdx);
itTerminalNoCase:
if SameText(Tags.GetTokenStr(CurIdx), Item.FTerminal) then
Result := TParserTermNode.Create(Item, CurIdx);
itTokenRule:
if (Item.FTokenType = 0) or (Tags.GetTokenType(CurIdx) = Item.FTokenType) then
Result := TParserTermNode.Create(Item, CurIdx);
end;
if Result <> nil then
begin
Dec(CurIdx);
SipComments;
end;
end;
end;
function BranchProcess(Branch: TParserRuleBranch): TParserNode;
var i, sv_idx, rep_cnt: integer;
node: TParserNode;
begin
if Branch.Count = 0 then
begin
Result := nil;
Exit;
end;
sv_idx := CurIdx;
Result := TParserBranchNode.Create(Branch);
for i := Branch.Count - 1 downto 0 do
begin
rep_cnt := 0;
node := nil;
while ((rep_cnt = 0) or (node <> nil)) and
((Branch.Items[i].FRepMax = -1) or (rep_cnt < Branch.Items[i].FRepMax)) do
begin
node := ItemProcess(Branch.Items[i]);
if node <> nil then TParserBranchNode(Result).Add(node) else
if rep_cnt < Branch.Items[i].FRepMin then
begin
CurIdx := sv_idx;
Result.Free;
Result := nil;
Exit;
end else Break;
Inc(rep_cnt);
end;
end;
end;
function RuleProcess(Rule: TParserRule): TParserNode;
var i, handle: integer;
begin
// Check stack
handle := CurIdx shl 16 + Rule.FIndex;
if CallStack.IndexOf(TObject(handle)) <> -1 then
raise Exception.Create('Circular stack');
CallStack.Add(TObject(handle));
try
Result := nil;
for i := Rule.Count - 1 downto 0 do
begin
// if Assigned(TraceParserProc) and not In_SkipRule then
// TraceParserProc(Self, CurIdx, ' ' + Rule.FName);
Result := BranchProcess(Rule.Branches[i]);
if Result <> nil then Exit;
end;
finally
CallStack.Delete(CallStack.Count - 1);
end;
end;
begin
CallStack := TList.Create;
try
CurIdx := FromIndex;
Result := RuleProcess(Rule);
finally
CallStack.Free;
end;
end;
procedure TGrammaAnalyzer.Changed;
begin
if Assigned(FOnChange) then FOnChange(Self);
end;
initialization
TraceParserProc := nil;
end.