{This code has been copied from the Lazarus unit LazUTF8 and is distributed under them modified LGPL. See the file COPYING.modifiedLGPL.txt, included in the lAZARUS distribution, for details about the license. } {Modified for Delphi compatibility} function FindInvalidUTF8Codepoint(p: PAnsiChar; Count: integer; StopOnNonUTF8: Boolean): integer; // return -1 if ok var CharLen: Integer; c: Byte; begin if (p<>nil) then begin Result:=0; while Result=192) then exit; CharLen:=1; end else if c<=$DF {%11011111} then begin // could be 2 byte character (%110xxxxx %10xxxxxx) if (Result$F4{%11110100}) then exit; // out of range U+10FFFF if (c=$F4{%11110100}) and (ord(p[1])>$8F{%10001111}) then exit; // out of range U+10FFFF CharLen:=4; end else exit; // missing following bytes end else begin if StopOnNonUTF8 then exit; CharLen:=1; end; inc(Result,CharLen); inc(p,CharLen); if Result>Count then begin dec(Result,CharLen); exit; // missing following bytes end; end; end; // ok Result:=-1; end;