How to find a string in a file (Views: 29)
Problem/Question/Abstract: I have an array of char called FBuffer1. Let's say: StrCopy(FBuffer1,'Test'). I also have a file, let's say File1.exe. I would like to find a very quick way to be able to localize the string "test" in the file1.exe Answer: Solve 1: One way is to remove the file access problems. Load the whole file into a TMemoryStream, then search the stream. Example: { ... } var tmem: TMemoryStream; buf: array[1..4] of Char; begin zeromemory(@buf, 4); tmem := TMemoryStream.Create; tmem.loadfromfile('test1.exe'); tmem.position := 0; while tmem.position <> tmem.size do begin buf[1] := buf[2]; buf[2] := buf[3]; buf[3] := buf[4]; tmem.read(buf[4], 1); if compare(buf, 'hello') then Memo1.Lines.Add('match found at position ' + Inttostr(tmem.position)); end; tmem.destroy; end; Solve 2: I was working on just that some time ago. Here is my project file with some alternative functions and a time test. Just paste the following listing into a text file, rename the file to Project1.dpr, open the file in Delphi and run it. {$APPTYPE CONSOLE} program Project1; uses Windows, SysUtils; function ScanString(SourceStart, SourceEnd, Search: PChar; CaseSensitive: Boolean): PChar; var SourcePtr: PChar; SourceChr: Char; SearchPos: DWord; SearchPtr: PChar; begin Result := nil; if SourceStart > SourceEnd then Exit; if not CaseSensitive then CharUpperBuff(Search, Length(Search)); SourcePtr := SourceStart; SearchPos := 0; SearchPtr := Search; while SourcePtr <= SourceEnd do begin SourceChr := SourcePtr^; if not CaseSensitive then CharUpperBuff(@SourceChr, 1); if SourceChr = SearchPtr^ then begin Inc(SearchPtr); if SearchPtr^ = #0 then begin Result := SourcePtr - SearchPos; Break; end; Inc(SearchPos); end else if SearchPos > 0 then begin SearchPos := 0; SearchPtr := Search; end; Inc(SourcePtr); end; end; function ScanStringNew(SourceStart, SourceEnd, SearchStr: PChar; CaseSensitive: Boolean): PChar; var SourcePtr: PChar; ScanLen: DWord; ScanPos: DWord; ScanStr: PChar; ScanPtr: PChar; ScanUppStr: PChar; ScanUppPtr: PChar; ScanLowStr: PChar; ScanLowPtr: PChar; begin Result := nil; if SourceStart > SourceEnd then Exit; ScanLen := Length(SearchStr); if not CaseSensitive then begin GetMem(ScanUppStr, ScanLen); CopyMemory(ScanUppStr, SearchStr, ScanLen); CharUpperBuff(ScanUppStr, ScanLen); GetMem(ScanLowStr, ScanLen); CopyMemory(ScanLowStr, SearchStr, ScanLen); CharLowerBuff(ScanLowStr, ScanLen); end else begin ScanUppStr := SearchStr; ScanLowStr := SearchStr; end; ScanPos := 0; ScanUppPtr := ScanUppStr; ScanLowPtr := ScanLowStr; SourcePtr := SourceStart; ScanPtr := ScanStr; while SourcePtr <= SourceEnd do begin if (SourcePtr^ = ScanUppPtr^) or (SourcePtr^ = ScanLowPtr^) then begin Inc(ScanPos); if ScanPos = ScanLen then begin Result := SourcePtr - ScanPos + 1; Break; end; Inc(ScanUppPtr); Inc(ScanLowPtr); end else if ScanPos > 0 then begin ScanPos := 0; ScanUppPtr := ScanUppStr; ScanLowPtr := ScanLowStr; end; Inc(SourcePtr); end; if not CaseSensitive then begin FreeMem(ScanUppStr, ScanLen); FreeMem(ScanLowStr, ScanLen); end; end; function ScanStringAsm(SourceStart, SourceEnd, SearchStr: PChar; CaseSensitive: Boolean): PChar; var ScanLen: DWord; ScanPos: DWord; ScanStr: PChar; ScanPtr: PChar; ScanUppStr: PChar; ScanUppPtr: PChar; ScanLowStr: PChar; ScanLowPtr: PChar; begin if SourceStart > SourceEnd then Exit; ScanLen := Length(SearchStr); if not CaseSensitive then begin GetMem(ScanUppStr, ScanLen); CopyMemory(ScanUppStr, SearchStr, ScanLen); CharUpperBuff(ScanUppStr, ScanLen); GetMem(ScanLowStr, ScanLen); CopyMemory(ScanLowStr, SearchStr, ScanLen); CharLowerBuff(ScanLowStr, ScanLen); end else begin ScanUppStr := SearchStr; ScanLowStr := SearchStr; end; GetMem(ScanStr, ScanLen * 2 + 2); ScanPos := ScanLen; ScanPtr := ScanStr; ScanUppPtr := ScanUppStr; ScanLowPtr := ScanLowStr; while ScanPos > 0 do begin ScanPtr^ := ScanUppPtr^; Inc(ScanPtr); Inc(ScanUppPtr); ScanPtr^ := ScanLowPtr^; Inc(ScanPtr); Inc(ScanLowPtr); Dec(ScanPos); end; ScanPtr^ := #0; asm {Register use: EDI - pointer to source char ESI - pointer to par of scan chars AL - current source char EBX - match length counter ECX - source length counter DX - current par of scan chars} end; if not CaseSensitive then begin FreeMem(ScanUppStr, ScanLen); FreeMem(ScanLowStr, ScanLen); end; FreeMem(ScanStr, ScanLen * 2 + 2); end; end; if not CaseSensitive then begin FreeMem(ScanUppStr, ScanLen); FreeMem(ScanLowStr, ScanLen); end; FreeMem(ScanStr, ScanLen * 2 + 2); end; {Preserve registers:} PUSH EBX {Preserve registers EBX, EDI, ESI:} PUSH EDI PUSH ESI {Initialize registers:} MOV EDI, SourceStart {Move addr SourceStart to EDI} MOV ECX, SourceEnd {Calculate source length in ECX:} SUB ECX, EDI INC ECX MOV ESI, ScanStr {Move addr ScanStr to ESI} MOV DX, WORD[ESI] {Move first par of scan chars to DX} xor EBX, EBX {Set EBX (match counter) to 0} @01: {Main test loop:} MOV AL, BYTE[EDI] {Move current source char to AL} INC EDI {Inc EDI to point to next source char} CMP AL, DL {Compare AL with scan char in DL (uppcase)} JE@10 {Jump to @10 if equal (match)} CMP AL, DH {Compare AL with scan char in DH (lowcase)} JE@10 {Jump to @10 if equal (match)} TEST EBX, EBX {Test EBX (match counter)} JZ@02 {Jump to @02 if zero (i.e. first scan char)} SUB ESI, EBX {Move ESI back to start of scan string:} SUB ESI, EBX MOV DX, WORD[ESI] {Move first par of scan chars to DX} xor EBX, EBX {Set EBX to 0} @02: {Next loop:} DEC ECX {Dec ECX (source length counter)} JNZ@01 {Jump back to @01 if not zero} MOV Result, 0 {Move nil to Result (match not found)} JMP@99 {Jump to @99} @10: {Char match found:} INC EBX {Inc EBX (match length counter):} ADD ESI, 2 {Move ESI to next par of scan chars:} MOV DX, WORD[ESI] {Move this par of scan chars to DX} CMP DL, 0 {Compare char in DL with #0 (end of string)} JNE@02 {Jump to @02 if not equal (test next char)} {Match found:} SUB EDI, EBX {Move EDI back to first char in match} MOV Result, EDI {Move addr of match to Result} @99: {Restore registers:} POP ESI POP EDI POP EBX end; if not CaseSensitive then begin FreeMem(ScanUppStr, ScanLen); FreeMem(ScanLowStr, ScanLen); end; FreeMem(ScanStr, ScanLen * 2 + 2); end; procedure TimeTest2; var Time1: DWord; Time2: DWord; Search: string; TestName: string; TestFile: file; TestSize: DWord; TestBuff: PChar; TestScan: PChar; TestPtr: PChar; TestPos: Integer; HitCount: Integer; n, i, j: Integer; c: Char; Show: Boolean; begin n := 20; Show := false; Search := 'WINDOWS'; {TestBuff := PChar(Search); TestScan := TestBuff; c := TestScan^; Time1 := GetTickCount; for i := 1 to 10000000 do begin if TestBuff^ = c then begin end; end; Time2 := GetTickCount; WriteLn('Tickcount : ', Time2 - Time1); Exit;} TestName := 'c:\windows\help\getstart.chm'; AssignFile(TestFile, TestName); Reset(TestFile, 1); TestSize := FileSize(TestFile); GetMem(TestBuff, TestSize); BlockRead(TestFile, TestBuff^, TestSize); CloseFile(TestFile); WriteLn; WriteLn('Scaning for "', Search, '" ', n, ' times'); WriteLn('in file: ', TestName, ' size: ', TestSize, ' bytes'); HitCount := 0; Time1 := GetTickCount; for i := 1 to n do begin TestScan := TestBuff; repeat if TestScan <> TestBuff then Inc(TestScan, Length(Search)); TestScan := ScanString(TestScan, TestBuff + TestSize - 1, PChar(Search), false); if TestScan <> nil then begin Inc(HitCount); if Show then begin Write(HitCount, ' '); TestPtr := TestScan; for TestPos := 1 to Length(Search) do begin Write(TestPtr^); Inc(TestPtr); end; WriteLn; ReadLn; end; end; until TestScan = nil; end; Time2 := GetTickCount; WriteLn(' Tickcount ScanString : ', Time2 - Time1: 5, 'ms', ' hitcount:', HitCount); HitCount := 0; Time1 := GetTickCount; for i := 1 to n do begin TestScan := TestBuff; repeat if TestScan <> TestBuff then Inc(TestScan, Length(Search)); TestScan := ScanStringNew(TestScan, TestBuff + TestSize - 1, PChar(Search), false); if TestScan <> nil then begin Inc(HitCount); if Show then begin Write(HitCount, ' '); TestPtr := TestScan; for TestPos := 1 to Length(Search) do begin Write(TestPtr^); Inc(TestPtr); end; WriteLn; ReadLn; end; end; until TestScan = nil; end; Time2 := GetTickCount; WriteLn(' Tickcount ScanStringNew: ', Time2 - Time1: 5, 'ms', ' hitcount:', HitCount); HitCount := 0; Time1 := GetTickCount; for i := 1 to n do begin TestScan := TestBuff; repeat if TestScan <> TestBuff then Inc(TestScan, Length(Search)); TestScan := ScanStringAsm(TestScan, TestBuff + TestSize - 1, PChar(Search), false); if TestScan <> nil then begin Inc(HitCount); if Show then begin Write(HitCount, ' '); TestPtr := TestScan; for TestPos := 1 to Length(Search) do begin Write(TestPtr^); Inc(TestPtr); end; WriteLn; ReadLn; end; end; until TestScan = nil; end; Time2 := GetTickCount; WriteLn(' Tickcount ScanStringAsm: ', Time2 - Time1: 5, 'ms', ' hitcount:', HitCount); FreeMem(TestBuff, TestSize); end; begin TimeTest2; WriteLn; WriteLn('** press enter to close **'); ReadLn; end. Solve 3: function ScanFile(const filename: string; const forString: string; caseSensitive: Boolean): LongInt; { returns position of string in file or -1, if not found } const BufferSize = $8001; { 32K + 1 bytes } var pBuf, pEnd, pScan, pPos: Pchar; filesize: LongInt; bytesRemaining: LongInt; bytesToRead: Integer; F: file; SearchFor: Pchar; oldMode: Word; begin Result := -1; { assume failure } if (Length(forString) = 0) or (Length(filename) = 0) then Exit; SearchFor := nil; pBuf := nil; { open file as binary, 1 byte recordsize } AssignFile(F, filename); oldMode := FileMode; FileMode := 0; { read-only access } Reset(F, 1); FileMode := oldMode; try { allocate memory for buffer and pchar search string } SearchFor := StrAlloc(Length(forString) + 1); StrPCopy(SearchFor, forString); if not caseSensitive then { convert to upper case } AnsiUpper(SearchFor); GetMem(pBuf, BufferSize); filesize := System.Filesize(F); bytesRemaining := filesize; pPos := nil; while bytesRemaining > 0 do begin { calc how many bytes to read this round } if bytesRemaining >= BufferSize then bytesToRead := Pred(BufferSize) else bytesToRead := bytesRemaining; { read a buffer full and zero-terminate the buffer } BlockRead(F, pBuf^, bytesToRead, bytesToRead); pEnd := @pBuf[bytesToRead]; pEnd^ := #0; { scan the buffer. Problem: buffer may contain #0 chars! So we treat it as a concatenation of zero-terminated strings. } pScan := pBuf; while pScan < pEnd do begin if not caseSensitive then { convert to upper case } AnsiUpper(pScan); pPos := StrPos(pScan, SearchFor); { search for substring } if pPos <> nil then begin { Found it! } Result := FileSize - bytesRemaining + LongInt(pPos) - LongInt(pBuf); Break; end; pScan := StrEnd(pScan); Inc(pScan); end; if pPos <> nil then Break; bytesRemaining := bytesRemaining - bytesToRead; if bytesRemaining > 0 then begin { no luck in this buffers load. We need to handle the case of the search string spanning two chunks of file now. We simply go back a bit in the file and read from there, thus inspecting some characters twice } Seek(F, FilePos(F) - Length(forString)); bytesRemaining := bytesRemaining + Length(forString); end; end; finally CloseFile(F); if SearchFor <> nil then StrDispose(SearchFor); if pBuf <> nil then FreeMem(pBuf, BufferSize); end; end; Solve 4: One option is to just read the entire file into a single string. The old-fashioned way is to use BlockRead. You could also use a file stream. Once you have it in a single string you can use any normal string operations, even if there are embedded null bytes or CR/LF's. procedure TForm1.Button1Click(Sender: TObject); var s: string; f: file; p: integer; begin AssignFile(f, 'c:\winnt\system32\mspaint.exe'); FileMode := 0; Reset(f, 1); SetLength(s, FileSize(f)); BlockRead(f, s[1], FileSize(f)); CloseFile(f); p := pos('This program cannot be run in DOS mode', s); Label1.Caption := 'Found at : ' + IntToStr(p); end; |