writeln的明显副作用(“:width”说明符导致输出中出现问号)

时间:2014-03-09 19:13:01

标签: delphi io console-application delphi-xe2 side-effects

我有以下代码(RAD Studio XE2,Windows 7 x64):

program letters;

{$APPTYPE CONSOLE}

{$DEFINE BOO}

const
  ENGLISH_ALPHABET = 'abcdefghijklmnopqrstuvwxyz';

begin
{$IFDEF BOO}
  writeln;
{$ENDIF}
  write(ENGLISH_ALPHABET[1]:3);

  readln;
end.

{$DEFINE BOO}指令启用关闭时,我有以下(预期)输出(为了便于阅读,空格被点替换)

..a

当指令在时,我有以下(意外)输出:

// empty line here
?..a

而非预期

// empty line here
..a

当我将const ENGLISH_ALPHABET更改为const ENGLISH_ALPHABET: AnsiString时,预期输出会打印出没有问题的字符。删除:3格式或将其更改为:1后,没有问号。当输出重定向到文件时(通过AssignFile(Output, 'boo.log')或从命令行),再没有问号。

这种行为的正确解释是什么?

1 个答案:

答案 0 :(得分:9)

这是RTL中一个相当奇怪的错误。对write的调用解析为对_WriteWChar的调用。这个功能实现如下:

function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
  if width <= 1 then
    result := _Write0WChar(t, c)
  else
  begin
    if t.UTF16Buffer[0] <> #0 then
    begin
      _Write0WChar(t, '?');
      t.UTF16Buffer[0] := #0;
    end;

    _WriteSpaces(t, width - 1);
    Result := _Write0WChar(t, c);
  end;
end;

您看到的?由上面的代码发出。

那么,为什么会发生这种情况。我能构建的最简单的SSCCE是:

{$APPTYPE CONSOLE}
const
  ENGLISH_ALPHABET = 'abcdefghijklmnopqrstuvwxyz';
begin
  writeln;
  write(ENGLISH_ALPHABET[1]:3);
end.

所以,你的第一个电话writeln并解决了这个问题:

function _WriteLn(var t: TTextRec): Pointer;
begin
  if (t.Flags and tfCRLF) <> 0 then
    _Write0Char(t, _AnsiChr(cCR));
  Result := _Write0Char(t, _AnsiChr(cLF));
  _Flush(t);
end;

在这里,您将单个字符cLF,ASCII字符10,换行符推送到输出文本记录中。这导致t.MBCSBuffer被提供cLF个字符。该字符保留在缓冲区中,因为System._Write0Char.WriteUnicodeFromMBCSBuffer执行此操作:

t.MBCSLength := 0;
t.MBCSBufPos := 0;

但是当_WriteWChar执行时,它会不加选择地查看t.UTF16Buffer。这在TTextRec中声明如下:

type
  TTextRec = packed record 
    ....
    MBCSLength: ShortInt;
    MBCSBufPos: Byte;
    case Integer of
      0: (MBCSBuffer: array[0..5] of _AnsiChr);
      1: (UTF16Buffer: array[0..2] of WideChar);
  end;

因此,MBCSBufferUTF16Buffer共享相同的存储空间。

错误是_WriteWChar在没有首先检查缓冲区长度的情况下不应该查看t.UTF16Buffer的内容。由于TTextRec没有UTF16Length,因此无法立即明白如何实现。相反,如果t.UTF16Buffer包含有意义的内容,则约定是其长度由-t.MBCSLength给出!

所以_WriteWChar应该是:

function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
begin
  if width <= 1 then
    result := _Write0WChar(t, c)
  else
  begin
    if (t.MBCSLength < 0) and (t.UTF16Buffer[0] <> #0) then
    begin
      _Write0WChar(t, '?');
      t.UTF16Buffer[0] := #0;
    end;

    _WriteSpaces(t, width - 1);
    Result := _Write0WChar(t, c);
  end;
end;

这是一个修复_WriteWChar的相当卑鄙的黑客攻击。请注意,我无法获得System._WriteSpaces的地址以便能够调用它。如果你不顾一切地解决这个问题,那就可以做到。

{$APPTYPE CONSOLE}

uses
  Windows;

procedure PatchCode(Address: Pointer; const NewCode; Size: Integer);
var
  OldProtect: DWORD;
begin
  if VirtualProtect(Address, Size, PAGE_EXECUTE_READWRITE, OldProtect) then
  begin
    Move(NewCode, Address^, Size);
    FlushInstructionCache(GetCurrentProcess, Address, Size);
    VirtualProtect(Address, Size, OldProtect, @OldProtect);
  end;
end;

type
  PInstruction = ^TInstruction;
  TInstruction = packed record
    Opcode: Byte;
    Offset: Integer;
  end;

procedure RedirectProcedure(OldAddress, NewAddress: Pointer);
var
  NewCode: TInstruction;
begin
  NewCode.Opcode := $E9;//jump relative
  NewCode.Offset := NativeInt(NewAddress)-NativeInt(OldAddress)-SizeOf(NewCode);
  PatchCode(OldAddress, NewCode, SizeOf(NewCode));
end;

var
  _Write0WChar: function(var t: TTextRec; c: WideChar): Pointer;

function _Write0WCharAddress: Pointer;
asm
  MOV     EAX, offset System.@Write0WChar
end;

function _WriteWCharAddress: Pointer;
asm
  MOV     EAX, offset System.@WriteWChar
end;

function _WriteWChar(var t: TTextRec; c: WideChar; width: Integer): Pointer;
var
  i: Integer;
begin
  if width <= 1 then
    result := _Write0WChar(t, c)
  else
  begin
    if (t.MBCSLength < 0) and (t.UTF16Buffer[0] <> #0) then
    begin
      _Write0WChar(t, '?');
      t.UTF16Buffer[0] := #0;
    end;

    for i := 1 to width - 1 do
      _Write0WChar(t, ' ');
    Result := _Write0WChar(t, c);
  end;
end;

const
  ENGLISH_ALPHABET = 'abcdefghijklmnopqrstuvwxyz';

begin
  @_Write0WChar := _Write0WCharAddress;
  RedirectProcedure(_WriteWCharAddress, @_WriteWChar);

  writeln;
  write(ENGLISH_ALPHABET[1]:3);
end.

我提交了QC#123157