什么可能导致“目标多字节代码页中不存在Unicode字符的映射”?

时间:2016-02-29 20:18:11

标签: delphi unicode delphi-xe7

我有一个显示EEncodingError的EurekaLog错误报告。日志指向TFile.AppendAllText。我打电话TFile.AppendAllText是我的这个程序:

procedure WriteToFile(CONST FileName: string; CONST uString: string; CONST WriteOp: WriteOpperation; ForceFolder: Boolean= FALSE);     // Works with UNC paths
begin
 if NOT ForceFolder
 OR (ForceFolder AND ForceDirectoriesMsg(ExtractFilePath(FileName))) then
   if WriteOp= (woOverwrite)
   then IOUtils.TFile.WriteAllText (FileName, uString)
   else IOUtils.TFile.AppendAllText(FileName, uString);
end;

这是来自EurekaLog的信息。

enter image description here

enter image description here

是什么导致这种情况发生?

1 个答案:

答案 0 :(得分:13)

此程序会重现您报告的错误:

{$APPTYPE CONSOLE}

uses
  System.SysUtils, System.IOUtils;

var
  FileName: string;

begin
  try
    FileName := TPath.GetTempFileName;
    TFile.WriteAllText(FileName, 'é', TEncoding.ANSI);
    TFile.AppendAllText(FileName, 'é');
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
end.

这里我把原始文件写成ANSI。然后调用AppendAllText,它将尝试写为UTF-8。会发生什么是我们最终在这个功能:

class procedure TFile.AppendAllText(const Path, Contents: string);
var
  LFileStream: TFileStream;
  LFileEncoding: TEncoding; // encoding of the file
  Buff: TBytes;
  Preamble: TBytes;
  UTFStr: TBytes;
  UTF8Str: TBytes;
begin
  CheckAppendAllTextParameters(Path, nil, False);

  LFileStream := nil;
  try
    try
      LFileStream := DoCreateOpenFile(Path);
      // detect the file encoding
      LFileEncoding := GetEncoding(LFileStream);

      // file is written is ASCII (default ANSI code page)
      if LFileEncoding = TEncoding.ANSI then
      begin
        // Contents can be represented as ASCII;
        // append the contents in ASCII

        UTFStr := TEncoding.ANSI.GetBytes(Contents);
        UTF8Str := TEncoding.UTF8.GetBytes(Contents);

        if TEncoding.UTF8.GetString(UTFStr) = TEncoding.UTF8.GetString(UTF8Str) then
        begin
          LFileStream.Seek(0, TSeekOrigin.soEnd);
          Buff := TEncoding.ANSI.GetBytes(Contents);
        end
        // Contents can be represented only in UTF-8;
        // convert file and Contents encodings to UTF-8
        else
        begin
          // convert file contents to UTF-8
          LFileStream.Seek(0, TSeekOrigin.soBeginning);
          SetLength(Buff, LFileStream.Size);
          LFileStream.ReadBuffer(Buff, Length(Buff));
          Buff := TEncoding.Convert(LFileEncoding, TEncoding.UTF8, Buff);

          // prepare the stream to rewrite the converted file contents
          LFileStream.Size := Length(Buff);
          LFileStream.Seek(0, TSeekOrigin.soBeginning);
          Preamble := TEncoding.UTF8.GetPreamble;
          LFileStream.WriteBuffer(Preamble, Length(Preamble));
          LFileStream.WriteBuffer(Buff, Length(Buff));

          // convert Contents in UTF-8
          Buff := TEncoding.UTF8.GetBytes(Contents);
        end;
      end
      // file is written either in UTF-8 or Unicode (BE or LE);
      // append Contents encoded in UTF-8 to the file
      else
      begin
        LFileStream.Seek(0, TSeekOrigin.soEnd);
        Buff := TEncoding.UTF8.GetBytes(Contents);
      end;

      // write Contents to the stream
      LFileStream.WriteBuffer(Buff, Length(Buff));
    except
      on E: EFileStreamError do
        raise EInOutError.Create(E.Message);
    end;
  finally
    LFileStream.Free;
  end;
end;

错误源于这一行:

if TEncoding.UTF8.GetString(UTFStr) = TEncoding.UTF8.GetString(UTF8Str) then

问题在于UTFStr实际上并非有效UTF-8。因此TEncoding.UTF8.GetString(UTFStr)会引发异常。

这是TFile.AppendAllBytes中的缺陷。鉴于它完全清楚UTFStr ANSI已编码,因此调用TEncoding.UTF8.GetString毫无意义。

你应该向Embarcadero提交一份关于这个缺陷的错误报告,这个缺陷仍然存在于Delphi 10 Seattle中。在此期间,您不应使用TFile.AppendAllBytes