我尝试读取UTF-16文件以使用wftring wstring。但在转储内存之后,我发现它不是我想要的。例如,UTf-16文件中的“Chào”字符串是“FF FE 43 00 68 00 E0 00 6F 00”(使用十六进制编辑器)。 wstring:
[0]FF (BOM)
[1]FE (BOM)
[2]43
[3]00
[4]68
[5]E0
[6]00
[7]6F
[8]00
因此,使用fstream :: open,它只是逐字节读取并将其存储为wchar。但我真正想要的是一个UTF-16编码的wstring,所以wstring应该是:
[0]43
[1]68
[2]E0
[3]6F
那么如何使用wfstream正确编码读取UTF-16文件。感谢阅读:D
答案 0 :(得分:-1)
也许您应该在阅读之前尝试更改编码:
const std::locale AvailLocale
= std::locale(std::locale("Russian"), new std::codecvt_utf16<wchar_t>());
wfstream myfile;
myfile.open(...);
将语言“俄语”更改为您的计算机默认语言,它应该可以使用!
答案 1 :(得分:-1)
这是因为BOM必须以二进制形式写入/读取,而文本只是在文本模式下完成。
你可以使用这样的东西来关闭/重新打开文件,或者手动执行它。在其他地方你可能不得不使用C ++ 11或WinAPI ..想法是以二进制模式读/写bom然后以文本模式读/写文件。它的工作方式。我测试了它。否则,您将不得不进行转换。
#include <iostream>
#include <vector>
#include <fstream>
template<typename T, typename Traits = std::char_traits<T>>
class ModFStream
{
private:
std::string filepath;
std::basic_fstream<T, Traits> stream;
std::ios_base::openmode mode;
public:
ModFStream() : stream(), mode() {}
ModFStream(const std::string &FilePath, std::ios_base::openmode mode) : filepath(FilePath), stream(FilePath, mode), mode(mode) {}
~ModFStream() {}
inline std::basic_fstream<T, Traits>& get() {return stream;}
void setmode(std::ios::openmode mode)
{
stream.close();
stream.open(filepath, mode);
}
template<typename U>
ModFStream& operator << (const U& other)
{
stream << other;
return *this;
}
template<typename U>
ModFStream& operator >> (U& other)
{
stream >> other;
return *this;
}
};
int main()
{
wchar_t bom[] = L"\xFF\xFE";
std::wstring str = L"Chào";
ModFStream<wchar_t> stream("C:/Users/Brandon/Desktop/UTF16Test.txt", std::ios::out | std::ios::binary);
stream << bom;
stream.setmode(std::ios::out | std::ios::binary);
stream << str;
str.clear();
stream.setmode(std::ios::in | std::ios::binary);
stream >> bom[0] >> bom[1];
stream.setmode(std::ios::in);
stream >> str;
std::wcout<<str;
}
我猜你可以写一个WinAPI fstream模拟器..
#include <iostream>
#include <vector>
#include <locale>
#include <windows.h>
namespace win
{
template<typename T>
struct is_wide_char : std::false_type {};
template<>
struct is_wide_char<wchar_t> : std::true_type {};
enum class open_mode
{
app = 1L << 0,
ate = 1L << 1,
bin = 1L << 2,
in = 1L << 3,
out = 1L << 4,
trunc = 1L << 5
};
enum class seek_dir
{
beg = 1L << 0,
cur = 1L << 1,
end = 1L << 2
};
inline constexpr open_mode operator & (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) & static_cast<int>(b));}
inline constexpr open_mode operator | (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) | static_cast<int>(b));}
inline constexpr open_mode operator ^ (open_mode a, open_mode b) {return open_mode(static_cast<int>(a) ^ static_cast<int>(b));}
inline constexpr open_mode operator~(open_mode a) {return open_mode(~static_cast<int>(a));}
inline const open_mode& operator |= (open_mode& a, open_mode b) {return a = a | b;}
inline const open_mode& operator &= (open_mode& a, open_mode b) {return a = a & b;}
inline const open_mode& operator ^= (open_mode& a, open_mode b) {return a = a ^ b;}
template<typename T>
std::wstring to_wide_string(const T* str)
{
if (is_wide_char<T>::value)
return std::wstring(str);
std::wstring utf16 = std::wstring(std::mbstowcs(nullptr, reinterpret_cast<const char*>(str), 0), '\0');
std::mbstowcs(&utf16[0], reinterpret_cast<const char*>(str), utf16.size());
return utf16;
}
template<typename T>
class WinFStream
{
private:
open_mode mode;
HANDLE hFile;
bool binary_mode = false;
public:
WinFStream(const T* FilePath, open_mode mode = open_mode::in | open_mode::out) : mode(mode), hFile(nullptr), binary_mode(false)
{
unsigned int open_flags = 0;
if (static_cast<int>(mode & open_mode::bin))
{
binary_mode = true;
}
if (static_cast<int>(mode & open_mode::in))
{
open_flags |= GENERIC_READ;
}
else if (static_cast<int>(mode & open_mode::app))
{
open_flags |= FILE_APPEND_DATA;
}
if (static_cast<int>(mode & open_mode::out))
{
open_flags |= GENERIC_WRITE;
}
std::wstring path = to_wide_string(FilePath);
hFile = CreateFileW(path.c_str(), open_flags, 0, nullptr, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr);
if (static_cast<int>(mode & open_mode::ate))
{
SetFilePointer(hFile, 0, nullptr, FILE_END);
}
}
~WinFStream() {CloseHandle(hFile); hFile = nullptr;}
inline std::size_t seekg(std::size_t pos, seek_dir from)
{
return SetFilePointer(hFile, pos, nullptr, static_cast<int>(from) - 1);
}
inline std::size_t tellg()
{
return GetFileSize(hFile, nullptr);
}
void close()
{
CloseHandle(hFile);
hFile = nullptr;
}
template<typename U>
inline std::size_t write(const U* str, std::size_t size)
{
long unsigned int bytes_written = 0;
WriteFile(hFile, &str[0], size * sizeof(U), &bytes_written, nullptr);
return bytes_written;
}
template<typename U>
inline std::size_t read(U* str, std::size_t size)
{
long unsigned int bytes_read = 0;
ReadFile(hFile, &str[0], size * sizeof(U), &bytes_read, nullptr);
return bytes_read;
}
template<typename U>
WinFStream& operator << (const U &other)
{
this->write(&other, 1);
return *this;
}
template<typename U, std::size_t size>
WinFStream& operator << (U (&str)[size])
{
this->write(&str[0], size);
return *this;
}
template<typename U, typename Traits = std::char_traits<U>>
WinFStream& operator << (const std::basic_string<U, Traits>& str)
{
this->write(str.c_str(), str.size());
return *this;
}
template<typename U>
WinFStream& operator >> (U &other)
{
this->read(&other, 1);
return *this;
}
template<typename U, std::size_t size>
WinFStream& operator >> (U (&str)[size])
{
this->read(&str[0], size);
return *this;
}
template<typename U, typename Traits = std::char_traits<U>>
WinFStream& operator >> (std::basic_string<U, Traits>& str)
{
unsigned int i = 0;
std::vector<U> buffer(512, 0);
while(true)
{
long unsigned int bytes_read = 0;
bool result = ReadFile(hFile, &buffer[i], sizeof(U), &bytes_read, nullptr);
if (std::isspace(buffer[i]) || buffer[i] == '\r' || buffer[i] == '\n')
break;
++i;
if (bytes_read != sizeof(U) || !result)
break;
}
str.append(buffer.begin(), buffer.begin() + i);
return *this;
}
};
typedef WinFStream<wchar_t> WinFStreamW;
typedef WinFStream<char> WinFStreamA;
}
using namespace win;
int main()
{
unsigned char bom[2] = {0XFF, 0xFE};
std::wstring str = L"Chào";
WinFStreamW File(L"C:/Users/Brandon/Desktop/UTF16Test.txt");
File << bom;
File << str;
File.seekg(0, win::seek_dir::beg);
std::wstring str2;
File>>bom;
File>>str2;
std::wcout<<str2;
}
我知道,它很脏,并且与fstream
的工作方式完全相同,但值得我花时间&#34;尝试&#34;模拟它..
但是,我的运营商&lt;&lt;和&gt;&gt;没有&#34;等同于&#34;到std::fstream's
..
在以二进制模式写入bom之后,您可能最好只使用CreateFileW, ReadFile, WriteFile
或以文本模式重新打开文件。