我正在使用包含
的windows-1252字符串拉丁大写字母反向E(unicode U + 018E)
当我将字符串转换为UTF-8时,字符串会在行首添加BOM "þÿ"
(UTF-16 BE,十六进制表示:FE FF)。
如何在没有BOM的情况下将其转换为UTF-8?
这是我的代码。
当我将其转换为ASCII时,我得到"?"符号。 如果我转换为UTF-8,我会得到" BOM"在该行中的每个字符后添加空格的字符问题。
这是我的输出。我正在尝试将文本文件转换为pdf文件。
Windows-1252 to ASCII和Windows-1252 to UTF-8
而且如果我想复制文本内容(相同的间距和结构),有什么方法可以做到吗?
private OutputStream getStreamFromTextFile(byte[] fileInput) throws IOException, COSVisitorException
{
//Made the changes for DS text alignment.
OutputStream outputStream = new ByteArrayOutputStream();
PDDocument doc = new PDDocument();
PDRectangle rectangle_A4 = new PDRectangle(598f, 824f);
PDPage page = new PDPage(rectangle_A4);
doc.addPage(page);
PDPageContentStream stream = new PDPageContentStream(doc,page);
PDFont pdfFont = PDType1Font.TIMES_ROMAN;
float fontSize = 14;
float leading = 1.5f * fontSize;
PDRectangle mediabox = page.getMediaBox();
float margin = 40;
float width = mediabox.getWidth() - 2*margin;
float startX = mediabox.getLowerLeftX() + margin;
float startY = mediabox.getUpperRightY() - margin;
/*
String data = new String(fileInput,Charset.forName("windows-1252"));
byte[] Ascii_Bytes = data.getBytes(Charset.forName("US-ASCII"));
String text = new String(Ascii_Bytes,"US-ASCII");
*/
String data = new String(fileInput,Charset.forName("windows-1252"));
byte[] UTF_8_Bytes = data.getBytes(Charset.forName("UTF-8"));
String text = new String(UTF_8_Bytes,"UTF-8");
/*
String text = new String(fileInput,Charset.forName("windows-1252"));
*/
List<String> lines = new ArrayList<String>();
int lastSpace = -1;
while (text.length() > 0)
{
int spaceIndex = text.indexOf(' ', lastSpace + 1);
if (spaceIndex < 0)
spaceIndex = text.length();
String subString = text.substring(0, spaceIndex);
float size = fontSize * pdfFont.getStringWidth(subString) / 1000;
System.out.printf("'%s' - %f of %f\n", subString, size, width);
if (size > width-(margin-10))
{
if (lastSpace < 0)
lastSpace = spaceIndex;
subString = text.substring(0, lastSpace);
lines.add(subString);
text = text.substring(lastSpace).trim();
System.out.printf("'%s' is line\n", subString);
lastSpace = -1;
}
else if (spaceIndex == text.length())
{
lines.add(text);
System.out.printf("'%s' is line\n", text);
text = "";
}
else
{
lastSpace = spaceIndex;
}
}
stream.beginText();
stream.setFont(pdfFont, fontSize);
stream.moveTextPositionByAmount(startX, startY);
float currentY = startY;
for (String line: lines)
{
currentY -= leading;
if (currentY<=margin)
{
stream.endText();
stream.close();
PDRectangle new_rectangle_A4 = new PDRectangle(598f, 824f);
PDPage new_page = new PDPage(new_rectangle_A4);
doc.addPage(new_page);
stream = new PDPageContentStream(doc,new_page);
stream.beginText();
stream.setFont(pdfFont, fontSize);
stream.moveTextPositionByAmount(startX, startY);
currentY = startY;
}
stream.drawString(line);
stream.moveTextPositionByAmount(0, -leading);
}
stream.endText();
stream.close();
doc.save(outputStream);
doc.close();
return outputStream;
}