Mirror

Parse an XML file (Views: 717)

Problem/Question/Abstract:

Another method to parse an XML file

Answer:

Here I will show one way to parse XML document

The main concept of XML is using containers for XML objects - so we will use Tree concept while building our XML object from XML document.

XML text uses containers (...) or simple definitions ()  in each TAG we can use parameters (... ...)

Finally we will have an array of objects, describing XML tags. Every object of this class will have an array of children if needed, and a hash to describe properties of it.


For example if we have a text








we will have one root object (named "UL") in wich we will have 4 children (named "LI" with different sets of properties - from "NAME"="xxx1" to "NAME"="xxx4")

This is not a trivial task - so we will make a unit to solve this... I will try to comment some here...

unit YZXMLParser;

interface

uses
SysUtils, ComCtrls;

type
THashElement = record
Key, Value: string;
end;

type
THashElementArr = array of THashElement;

// here  we declare a THash class to use in our parser
// The concept of THash is to retreive named values from an array
// Hash is an array where index is a string (example V[Key]=value,
// whehe Key and Value are of type string)

// The main purpose of this class is to rerurn a value of a String-named key
//(example: s:=hash['someValue'])

// the description of a hash element we use

type
THash = class(TObject)
private
Arr: THashElementArr;
function GetValue(Key: string): string;
procedure SetValue(Key: string; const VValue: string);
function GetKeys: StrArr;
function GetValues: StrArr;
function GetCount: Integer;
function Getempty: Boolean;
public
property Value[Key: string]: string read GetValue write SetValue; default;
property Values: StrArr read GetValues;
property Keys: StrArr read GetKeys;
property Count: Integer read GetCount;
property Empty: Boolean read Getempty;
procedure Clear;
constructor Create;
destructor Destroy; override;
end;

TYZHash = THash;

type

// Here we declare some definitions for our parser to know what
// identifier we would receive next in our text
// these  values will be used in the result of WhatNext() function which will scan text for keys

TYZXMLMarker = (xmlOpenTag, xmlCloseTagShort, xmlCloseTag, xmlCloseTagLong,
xmlEOF, xmlIdentifier, xmlunknown);
/ *

Because we use recursive definition of our class(as TreeView, where we declare
children of
the same type in opur type
declaration)we must use forward declaration
* /

// The definition of a TAG class

TYZXMLTag = class;
TYZXMLTags = array of TYZXMLTag;

TYZXMLTag = class(TObject)
private
FData: TYZHash;
FParent: TYZXMLTag;
FName: string;

function GetValue(AName: string): string;
procedure SetName(const Value: string);
procedure SetValue(AName: string; const Value: string);
function GetCount: Integer;
function GetValueNames: strarr;

public
Children: TYZXMLTags; // these are our child nodes
Text: string;

property Name: string read FName write SetName; // name of a tag
property Values[AName: string]: string read GetValue write SetValue;
default; // values of properties of a tag (hash values)
property ValueNames: strarr read GetValueNames;
// array of strings returniong names of all props of this tag
property Count: Integer read GetCount;
// a count of children of a tag (if this tag is a container)

function SkipSpaces(var AData: string; var APos: Integer;
RememberBreaks: Boolean = False): Char;
// internal. for skip spaces (also CR or LF or other non-text chars) while parsing text

function ParseValue(var AData: string; var APos: Integer): Boolean;
// parse value (calling when found a parameter of a tag)
function ParseName(var AData: string; var APos: Integer): Boolean;
// parse key of parameter in a tag

// these two procs used to parse any text found while parsing XML
function ParseString(var AData: string; var APos: Integer;
RememberBreaks: Boolean = False): string;
function ParseQuotedString(var AData: string; var APos: Integer;
QIndef: Char = '"'): string;

// returnes the type of next identifier in XML
function WhatNext(var AData: string; var APos: Integer;
var ANext: Integer; RememberBreaks: Boolean = False): TYZXMLMarker;

// This is a main procedure of our class - AData is a string,
// containing all XML data (you can use TMemo.Text, for example, as a parameter of AData)
function ParseXML(var AData: string; var APos: Integer): Boolean;

// This function returnes a text string, built based on data, stored in an object.
function GenerateXML(var AData: string; ATab: string = ''): Boolean;

// returnes char from string at specified pos (#0 if not in range)
function CharAt(var S: string; APos: Integer): Char;

function TagNameExists(AName: string): Boolean;

// Adds a child to children array of a current tag

function AddChild: TYZXMLTag;

// Initializes current tag and deletes all existing children
procedure Clear; virtual;

constructor Create(AParent: TYZXMLTag); virtual;
destructor Destroy; virtual;
end;

type
TYZXMLParser = class(TYZXMLTag)
private
Header: TYZHash;
procedure _BuildTreeView(ATreeView: TTreeView; ANode: TTreeNode; ATag: TYZXMLTag);
public
property HeaderValues: TYZHash read Header;

procedure BuildTreeView(ATreeView: TTreeView);
function Parse(AData: string): Boolean;
function Generate(var AData: string): Boolean;
constructor Create;
destructor Destroy;
end;

implementation

//==============================================================================

{ TYZXMLTag }

function TYZXMLTag.AddChild: TYZXMLTag;
begin
setlength(children, Length(children) + 1);
Result := TYZXMLTag.Create(Self);
children[Length(children) - 1] := Result;
end;

//------------------------------------------------------------------------------

procedure TYZXMLTag.Clear;
var
i: Integer;
begin
for i := 0 to Count - 1 do
if children[i] <> nil then
Children[i].Destroy;
setlength(children, 0);
FData.Clear;
Text := '';
end;

//------------------------------------------------------------------------------

constructor TYZXMLTag.Create(AParent: TYZXMLTag);
begin
inherited Create;
FData := TYZHash.Create;
FParent := AParent;
Clear;
end;

//------------------------------------------------------------------------------

destructor TYZXMLTag.Destroy;
begin
Clear;
FData.Destroy;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.GetCount: Integer;
begin
Result := Length(children);
end;

//------------------------------------------------------------------------------

function TYZXMLTag.GetValue(AName: string): string;
begin
Result := FData[AName];
end;

//------------------------------------------------------------------------------

function TYZXMLTag.ParseName(var AData: string; var APos: Integer): Boolean;
begin
Result := False;
FName := ParseString(AData, APos);
if fname = '' then
Exit;
Result := True;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.ParseQuotedString(var AData: string; var APos: Integer;
QIndef: Char = '"'): string;
var
i: Integer;
skipnext: Boolean;
z: Char;
begin
Result := '';
if CharAt(AData, APos) <> QIndef then
Exit;
i := apos;
skipnext := True;
repeat
if not skipnext then
begin
if charat(AData, I) = '\' then
SkipNext := True
else
begin
z := charat(AData, I);
if (Z = QIndef) or (z = #0) then
begin
Result := Copy(AData, aPos + 1, I - APos - 1);
//          result:=exch(result,'\','');
APos := I + 1;
Exit;
end;
end;
end
else
skipnext := False;
Inc(i);
until False;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.ParseString(var AData: string; var APos: Integer;
RememberBreaks: Boolean = False): string;
const
extsym: string = '=<>;?*/';
var
nxt: Char;
x1, x2, i: Integer;
begin
Result := '';
nxt := SkipSpaces(AData, APos, RememberBreaks);
if nxt = #0 then
Exit;
if (nxt = '"') or (nxt = '''') then
begin
Result := ParseQuotedString(AData, APos);
Exit;
end;
x1 := APos;
i := x1;
nxt := CharAt(AData, i);
while ((Ord(nxt) <= 32) or (Pos(nxt, extsym) > 0)) and (nxt <> #0) do
begin
Inc(i);
nxt := CharAt(AData, i);
end;
APos := i;
X1 := APos;
while (Ord(nxt) > 32) and (Pos(nxt, extsym) <= 0) do
begin
Inc(i);
nxt := CharAt(AData, i);
end;
x2 := i - x1;
Result := Copy(AData, x1, x2);
APos := i;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.ParseValue(var AData: string; var APos: Integer): Boolean;
var
n, v: string;
i, x: Integer;
begin
Result := False;
n := parseString(AData, APos);
if n = '' then
Exit;
if skipspaces(AData, APos) <> '=' then
Exit;
Inc(apos);
V := parseString(AData, APos);
fdata[n] := dequote(v);
Result := True;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.ParseXML(var AData: string; var APos: Integer): Boolean;
var
N: TYZXMLMarker;
nxt: Integer;
isLong: Boolean;
inTag: Boolean;
begin
isLong := False;
Result := False;
Clear;
if WhatNext(AData, APos, nxt) <> xmlOpenTag then
Exit;
APos := nxt;
if WhatNext(AData, APos, nxt) <> xmlIdentifier then
Exit;
Result := ParseName(AData, APos);
if not Result then
Exit;
intag := True;
Result := False;
while True do
begin
N := WhatNext(AData, APos, nxt, (not intag and islong and (Count > 0)));
case N of
xmlEOF: Exit;
xmlCloseTagLong:
begin
Result := True;
if islong then
APos := nxt;
if (Text <> '') and (Count > 0) then
begin
Text := exch(Text, #13#10#13#10, #13#10);
end;

Exit;
end;
xmlCloseTagShort:
begin
Result := (not IsLong) and intag;
if Result then
APos := nxt;
Exit;
end;
xmlOpenTag:
begin
if islong then
Result := AddChild.ParseXML(AData, APos)
else
begin
Result := False;
Exit;
end;
if not Result then
Exit;
end;
xmlCloseTag:
begin
IsLong := True;
APos := nxt;
intag := False;
end;
xmlIdentifier:
begin
if intag then
parsevalue(AData, APos)
else
Text := Text + ParseString(AData, APos, True)
end;
xmlUnknown:
begin
Result := True;
Exit;
end;
end;
end;
end;

//------------------------------------------------------------------------------

procedure TYZXMLTag.SetName(const Value: string);
begin
FName := Value;
end;

//------------------------------------------------------------------------------

procedure TYZXMLTag.SetValue(AName: string; const Value: string);
begin
FData[AName] := Value;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.SkipSpaces(var AData: string; var APos: Integer;
RememberBreaks: Boolean = False): Char;
var
L: Integer;
P: Char;
begin
L := Length(AData);
while APos <= L do
begin
P := AData[APos];
if Ord(p) > 32 then
begin
Result := p;
Exit;
end
else if rememberbreaks then
begin
if Pos(p, #13#9' ') > 0 then
Text := Text + ' ';
end;
Inc(APos);
end;
Result := #0;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.CharAt(var S: string; APos: Integer): Char;
begin
Result := #0;
if (Length(s) < APos) or (apos < 1) then
Exit;
Result := s[APos];
end;

//------------------------------------------------------------------------------

function TYZXMLTag.WhatNext(var AData: string; var APos: Integer;
var ANext: Integer; RememberBreaks: Boolean = False): TYZXMLMarker;
var
s: string;
C: Char;
P: Integer;
begin
Result := xmlEOF;
P := APos;
C := SkipSpaces(AData, APos);
P := APos;
ANext := P;
if C = #0 then
Exit;

if C = '<' then
if CharAt(AData, P + 1) = '/' then
begin
Inc(P, 2);
s := parsestring(AData, P);
if (uppercase(s) = uppercase(FName)) and (SkipSpaces(AData, P) = '>') then
begin
ANext := P + 1;
Result := xmlCloseTagLong;
Exit;
end
else
begin
if TagNameExists(s) then
begin
Result := xmlCloseTagLong;
ANext := APos;
Exit;
end;
ANext := P + 1;
Result := xmlCloseTagLong;
Exit;
end;
end;

if C = '<' then
begin
ANext := P + 1;
Result := xmlOpenTag;
Exit;
end;

if C = '>' then
begin
ANext := P + 1;
Result := xmlCloseTag;
Exit;
end;
if C = '/' then
if CharAt(AData, P + 1) = '>' then
begin
ANext := P + 2;
Result := xmlCloseTagShort;
Exit;
end;
ANext := P;
parsestring(AData, ANext);
Result := xmlIdentifier;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.GetValueNames: strarr;
begin
Result := FData.Keys;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.GenerateXML(var AData: string; ATab: string = ''): Boolean;
var
valDelimiter: string;
spc: string;
i: Integer;
a: strarr;
begin
spc := ATab + #9;
if FData.Count < 5 then
valDelimiter := ' '
else
valDelimiter := #13#10 + spc;
AData := AData + #13#10 + ATab + '<' + FName;
a := FData.keys;
for i := 0 to Length(a) - 1 do
begin
AData := AData + valDelimiter + a[i] + ' = "' + EnQuote(values[a[i]]) + '"';
end;
if (Count > 0) or (Text <> '') then
begin
AData := AData + '>' + Text;
for i := 0 to Count - 1 do
begin
Children[i].GenerateXML(AData, ATab + #9);
end;
AData := AData + #13#10 + ATab + '';
end
else
AData := AData + '/>';
Result := True;
end;

//------------------------------------------------------------------------------

function TYZXMLTag.TagNameExists(AName: string): Boolean;
begin
Result := AnsiUpperCase(AName) = AnsiUpperCase(Self.FName);
if Self.FParent = nil then
Exit;
if not Result then
Result := fparent.TagNameExists(AName);
end;

//==============================================================================

{ TYZXMLParser }

constructor TYZXMLParser.Create;
begin
Header := TYZHash.Create;
inherited Create(nil);
end;

//------------------------------------------------------------------------------

destructor TYZXMLParser.Destroy;
begin
inherited;
Header.Destroy;
end;

//------------------------------------------------------------------------------

procedure TYZXMLParser.BuildTreeView(ATreeView: TTreeView);
var
i: Integer;
begin
//  clear;
ATreeView.Items.Clear;
for i := 0 to Count - 1 do
_BuildTreeView(ATreeView, nil, children[i]);
end;

//------------------------------------------------------------------------------

procedure TYZXMLParser._BuildTreeView(ATreeView: TTreeView; ANode: TTreeNode;
ATag: TYZXMLTag);
var
i: Integer;
N: TTreeNode;
begin
N := ATreeView.Items.AddChildObject(ANode, ATag.Name + ' ' + FData['ID'],
Pointer(ATag));
for i := 0 to ATag.Count - 1 do
begin
if ATag.children[i] <> nil then
_BuildTreeView(ATreeView, N, ATag.children[i])
else
ATreeView.Items.AddChild(N, 'nil');
end;
N.Expanded := True;
end;

//------------------------------------------------------------------------------

function TYZXMLParser.Parse(AData: string): Boolean;
var
x1, x2, X, i: Integer;
s: string;
tmp: TYZXMLTag;
a: strarr;
N: TYZXMLMarker;
begin
X := 1;
Self.SkipSpaces(AData, X);
x2 := -1;
Result := False;
Clear;
Header.Clear;
x1 := Pos('if x1 >= X then
begin
x2 := Pos('?>', AData);
if x2 < X then
Exit;
s := uppercase(Copy(AData, x1 + 2, 4));
if Pos('XML ', s) <> 1 then
Exit;
s := '';
tmp := TYZXMLTag.Create(nil);
tmp.ParseXML(s, x);
a := tmp.ValueNames;
for i := 0 to Length(a) - 1 do
Header[a[i]] := tmp.Values[a[i]];
tmp.Destroy;
x := x2 + 2;
end;
Result := True;
repeat
N := whatnext(AData, X, x1);
case N of
xmlOpenTag: Result := Result and AddChild.ParseXML(AData, X);
xmlIdentifier:
begin
if Text <> '' then
Text := Text + ' ';
Text := Text + parsestring(AData, X, True);
end;
else
Parsestring(AData, X);
end;
until skipspaces(adata, x) = #0;
//  if not result then ShowMessage('Error Parsing: '+inttostr(X));
end;

function TYZXMLParser.Generate(var AData: string): Boolean;
var
i: Integer;
a: strarr;
begin
Header['Date'] := DateTimeToStr(now);
a := header.Keys;

AData := 'for i := 0 to Length(a) - 1 do
AData := AData + ' ' + a[i] + '="' + Header[a[i]] + '"';

AData := AData + '?>'#13#10 + Text;
Result := True;
for i := 0 to Length(children) - 1 do
begin
Result := Result and children[i].generatexml(AData);
end;
end;

//==============================================================================

// procedures of THash class

//==============================================================================

{THASH CLASS}

procedure THash.Clear;
begin
SetLength(Arr, 0);
end;

constructor THash.Create;
begin
inherited;
Clear;
end;

//------------------------------------------------------------------------------

destructor THash.Destroy;
begin
Clear;
inherited;
end;

//------------------------------------------------------------------------------

function THash.GetCount: Integer;
begin
Result := Length(Arr);
end;

//------------------------------------------------------------------------------

function THash.Getempty: Boolean;
begin
Result := Length(Arr) = 0;
end;

function THash.GetKeys: StrArr;
var
i: Integer;
begin
SetLength(Result, Length(arr));
for i := 0 to Length(Result) - 1 do
Result[i] := arr[i].Key;
end;

//------------------------------------------------------------------------------

function THash.GetValue(Key: string): string;
var
i: Integer;
r: Boolean;
begin
Result := '';
i := 0;
r := False;
while (i < Length(Arr)) and (not r) do
begin
if AnsiUpperCase(arr[i].key) = AnsiUpperCase(Key) then
begin
Result := Arr[i].Value;
r := True;
end;
i := i + 1;
end;
end;

//------------------------------------------------------------------------------

function THash.GetValues: StrArr;
var
i: Integer;
begin
SetLength(Result, Length(arr));
for i := 0 to Length(Result) - 1 do
Result[i] := arr[i].Value;
end;

//------------------------------------------------------------------------------

procedure THash.SetValue(Key: string; const VValue: string);
var
i, j: Integer;
r: Boolean;
E: THashElementArr;
begin
if VValue <> '' then
begin
i := 0;
r := False;
while (i < Length(Arr)) and not r do
begin
if AnsiUpperCase(arr[i].key) = AnsiUpperCase(Key) then
begin
Arr[i].Value := VValue;
r := True;
end;
i := i + 1;
end;
if not r then
begin
SetLength(Arr, Length(arr) + 1);
arr[Length(arr) - 1].Key := Key;
arr[Length(arr) - 1].Value := Vvalue;
end;
end;

SetLength(E, Length(Arr));
for i := 0 to Length(arr) - 1 do
E[i] := Arr[i];
SetLength(arr, 0);
for i := 0 to Length(E) - 1 do
if (E[i].Key <> '') and (E[i].Value <> '') then
begin
j := Length(arr);
setlength(arr, j + 1);
arr[j] := E[i];
end;
end;

end.


<< Back to main page