How to determine whether a string contains binary data or normal readable text (Views: 29)
Problem/Question/Abstract: How to determine whether a string contains binary data or normal readable text Answer: Solve 1: Well, actually there is not really a difference between binary and not. There is just a common "agreement" that everything below ASCII code 32 is a "binary" character. function IsReadable(st: string): Boolean; var i: Integer; begin Result := false; for i := 1 to Length(st) do if st[i] < #32 then Exit; Result := true; end; Solve 2: procedure GetVeryBasicFileType(const Path: string; var VeryBasicFileType: LongWord); overload; var Fs: TFileStream; ExtCharCount, NothingCount, SpaceCount, LfCount, i, DidRead: Word; LastB, B: byte; ReadBuffer: array[0..MAX_FILE_DETECT_BUFFER] of byte; begin VeryBasicFileType := VERY_BASIC_FILETYPE_UNKNOWN; if (FileExists(Path)) then begin Fs := TFileStream.Create(Path, fmOpenRead); try if (Fs.Size >= 1) then begin LfCount := 0; ExtCharCount := 0; SpaceCount := 0; NothingCount := 0; LastB := 0; Fs.Seek(0, soFromBeginning); DidRead := Fs.Read(ReadBuffer, SizeOf(ReadBuffer)); for i := 0 to DidRead - 2 do begin B := ReadBuffer[i]; if (B < 1) then Inc(NothingCount) else if (((B < 32) or (B > 127)) and not (B in [$0A, $0D, $21])) then Inc(ExtCharCount) else if ((B = $0A) or (B = $0D)) then Inc(LfCount) else if ((B = $21) and (LastB <> $21)) then Inc(SpaceCount); LastB := B; end; if (NothingCount > (DidRead div 30)) then begin if ((DidRead >= 4) and ((ReadBuffer[0] = $FF) and (ReadBuffer[1] = $FE)) or ((ReadBuffer[0] = $FE) and (ReadBuffer[1] = $FF))) then begin VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT_UNICODE; Exit; end else begin VeryBasicFileType := VERY_BASIC_FILETYPE_BINARY; Exit; end; end; if (ExtCharCount > (DidRead div 3)) then begin if ((LfCount >= (DidRead div 60)) or (SpaceCount > (DidRead div 6))) then VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT else VeryBasicFileType := VERY_BASIC_FILETYPE_BINARY; end else begin if ((ReadBuffer[0] = $EF) and (ReadBuffer[1] = $BB) and (ReadBuffer[2] = $BF) and (LfCount > (DidRead div 60))) then VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT_UTF8 else VeryBasicFileType := VERY_BASIC_FILETYPE_TEXT; end; end else VeryBasicFileType := VERY_BASIC_FILETYPE_EMPTY; finally Fs.Free; end; end; end; |