How to parse a TRichEdit for domain names (Views: 30)
Problem/Question/Abstract: How can I parse a TRichEdit for domains ending in .com, .net, .org? Answer: Solve 1: Not extensively tested: procedure TForm1.Button1Click(Sender: TObject); const charsAllowedInDomain = ['a'..'z', '0'..'9', '.', '_']; {may be more} numExts = 4; domainExts: array[1..numExts] of Pchar = ('.com', '.net', '.org', '.gov'); {lower case!} lens: array[1..numExts] of Integer = (4, 4, 4, 4); var S: string; pStartString, pScan, pStartDomain, pEndDomain: Pchar; domain: string; i: Integer; begin S := AnsiLowerCase(richedit1.text); pStartString := PChar(S); pScan := pStartString; while pScan^ <> #0 do begin if pScan^ = '.' then begin for i := Low(domainExts) to High(domainExts) do if StrLComp(pScan, domainExts[i], lens[i]) = 0 then begin {we have a candidate} pStartDomain := pScan; pEndDomain := pScan + lens[i]; if not (pEndDomain^ in charsAllowedInDomain) then begin while (pStartDomain > pStartString) and (pStartDomain[-1] in charsAllowedInDomain) do Dec(pStartDomain); SetString(domain, pStartDomain, pEndDomain - pStartDomain); listbox1.items.add(domain); pScan := pEndDomain - 1; break; end; end; end; Inc(pScan); end; end; Solve 2: { ... } type {declared in richedit.pas D3} TCharRange = record cpMin: Longint; cpMax: LongInt; end; TFindTextExA = record {declared in richedit.pas D3} chrg: TCharRange; lpstrText: PAnsiChar; chrgText: TCharRange; end; procedure REFindDomain(RE: TRichEdit; const Target: string; Strs: TStrings); const {maybe more than these?} ValidChars: set of char = ['a'..'z', 'A'..'Z', '0'..'9', '.', '/', ':', '_', '-']; var ftx: TFindTextExA; flags: longint; charpos: longint; s: string; begin if (Target = '') then exit; {nothing to look for} {searches all of the RichEdit} ftx.chrg.cpMin := 0; ftx.chrg.cpMax := -1; ftx.lpstrText := PChar(Target); ftx.chrgText.cpMin := 0; ftx.chrgText.cpMax := 0; flags := 0; // EM_FINDTEXTEX = WM_USER + 79; {declared in richedit.pas D3} while SendMessage(RE.Handle, WM_USER + 79, flags, longint(@ftx)) > -1 do begin RE.SelStart := ftx.chrgText.cpMin; {found at position} RE.SelLength := Length(Target); {get the line} if ftx.chrgText.cpMax >= 255 then s := Copy(RE.Lines.Text, ftx.chrgText.cpMax - 254, 255) else s := Copy(RE.Lines.Text, 1, ftx.chrgText.cpMax); {need to find start of domain name} charpos := Length(s); while (charpos > 1) and (s[charpos] in ValidChars) do Dec(charpos); if not (s[charpos] in ValidChars) then Inc(charpos); Strs.Add(Copy(s, charpos, Length(s))); ftx.chrg.cpMin := ftx.chrgText.cpMin + 1; {reset to found at pos} end; end; {ListBox1 contains 3 lines: '.com' '.net' '.org', ListBox2 receives the results} procedure TForm1.Button1Click(Sender: TObject); var i: integer; begin if ListBox1.Items.Count > 0 then begin ListBox2.Clear; for i := 0 to ListBox1.Items.Count - 1 do begin REFindDomain(RichEdit1, ListBox1.Items[i], ListBox2.Items); end; Label1.Caption := IntToStr(ListBox2.Items.Count); end; end; |