Question

考虑以下计划：

#include <iostream>
#include <sstream>
#include <string>

int main(int, char **) {
  std::basic_stringstream<char16_t> stream;

  stream.put(u'\u0100');
  std::cout << " Bad: " << stream.bad() << std::endl;

  stream.put(u'\uFFFE');
  std::cout << " Bad: " << stream.bad() << std::endl;

  stream.put(u'\uFFFF');
  std::cout << " Bad: " << stream.bad() << std::endl;

  return 0;
}

输出结果为：

 Bad: 0                                                                                                                                                                                
 Bad: 0                                                                                                                                                                                
 Bad: 1

似乎badbit被设置的原因是因为'put'设置了badbit，如果字符等于std :: char_traits :: eof（）。我现在不能再进入流程了。

在http://en.cppreference.com/w/cpp/string/char_traits，它声明：

int_type：一个整数类型，可以包含char_type plus的所有值 EOF

但是如果char_type与int_type（uint_least16_t）相同，那么这怎么可能呢？

Answer 1

标准非常明确，std::char_traits<char16_t>::int_type是std::uint_least16_t的typedef，请参阅[char.traits.specializations.char16_t]，其中也说：

成员eof()将返回一个实现定义的常量，该常量不能作为有效的UTF-16代码单元出现。

我不确定它是如何与http://www.unicode.org/versions/corrigendum9.html进行交互的，但主要C ++实现中的现有做法是使用char_traits<char16_t>::eof()的全1位模式，即使uint_least16_t具有std::char_traits<char16_t>::to_int_type(char_type)正好是16位。

经过一番思考后，我认为实现可以通过在给定U + FFFF时使eof()返回U + FFFD来满足Character traits要求。这符合e返回的要求：

值为X::eq_int_type(e,X::to_int_type(c))，false为所有值c basic_streambuf<char16_t>::sputc(u'\uFFFF')。

这还可以确保在检查eof()的结果时可以区分成功和失败，以便它在失败时仅返回u'\ufffd'，否则返回#define _GNU_SOURCE #include <iostream> #include <pthread.h> #include <unistd.h> #include <mutex> #include <condition_variable> #include <semaphore.h> #include <sched.h> #include <stdio.h> #include <assert.h> #include <stdlib.h> using namespace std; sem_t semaphore; sem_t mutex1; sem_t mutex2; sem_t mutex3; sem_t mutex4; // initialze variables int i = 0; int overrun1 = 0; int overrun2 = 0; int overrun3 = 0; int overrun4 = 0; int doWork(); void* p1(void *param); void* p2(void *param); void* p3(void *param); void* p4(void *param); int main(int argc, char const *argv[]) { cpu_set_t cpus; CPU_ZERO(&cpus); CPU_SET(1, &cpus); sem_init(&mutex1, 0, 0); sem_init(&mutex2, 0, 0); sem_init(&mutex3, 0, 0); sem_init(&mutex4, 0, 0); // initialze all threads pthread_t thread1; pthread_t thread2; pthread_t thread3; pthread_t thread4; // actually create all threads pthread_create(&thread1, NULL, p1, NULL); pthread_create(&thread2, NULL, p2, NULL); pthread_create(&thread3, NULL, p3, NULL); pthread_create(&thread4, NULL, p4, NULL); while (i < 160) { if (i == 0) // initial case. at time 0 schedule all threads { sem_post(&mutex1); sem_post(&mutex2); sem_post(&mutex3); sem_post(&mutex4); } else if (i % 16 == 0) // last case which happens every 16 units which schedules all threads again { sem_post(&mutex1); sem_post(&mutex2); sem_post(&mutex3); sem_post(&mutex4); } else if (i % 4 == 0) // case that happens every 4 units of time { sem_post(&mutex1); sem_post(&mutex2); sem_post(&mutex3); } else if (i % 2 == 0) // case that happens every other unit of time { sem_post(&mutex1); sem_post(&mutex2); } else if (i % 1 == 0) // case that happens every unit of time { sem_post(&mutex1); } i++; // increment i to go through the loop again } // join all threads back to the main one pthread_join(thread1, NULL); pthread_join(thread2, NULL); pthread_join(thread3, NULL); pthread_join(thread4, NULL); return 0; } // doWork function int doWork() { int lousyArray[10][10]; int product = 1; for (int i = 0; i < 10; i++) { for (int j = 0; j < 10; j++) { lousyArray[i][j] = 1; } } for (int k = 0; k < 1; k++) { for (int j = 0; j < 10; j++) { for (int i = 0; i < 10; i++) { product *= lousyArray[i][j]; } } } return 1; } void* p1(void *param) { bool thread1FinishFlag = false; while(1) { sem_wait(&mutex1); thread1FinishFlag = false; for (int i = 0; i < 1; i++) { doWork(); } //increment counter here thread1FinishFlag = true; } } void* p2(void *param) { bool thread2FinishFlag = false; while(1) { sem_wait(&mutex2); thread2FinishFlag = false; for (int i = 0; i < 2; i++) { doWork(); } //increment counter here thread2FinishFlag = true; } } void* p3(void *param) { bool thread3FinishFlag = false; while(1) { sem_wait(&mutex3); thread3FinishFlag = false; for (int i = 0; i < 4; i++) { doWork(); } //increment counter here thread3FinishFlag = true; } } void* p4(void *param) { bool thread4FinishFlag = false; while(1) { sem_wait(&mutex4); thread4FinishFlag = false; for (int i = 0; i < 16; i++) { doWork(); } //increment counter here thread4FinishFlag = true; } } void nsleep() { struct timespec delay; delay.tv_sec = 0; delay.tv_sec = 100000000L; nanosleep(&delay, NULL); }。 p>

我会试试。我已创建https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80624以在GCC中跟踪此内容。

我还报告了issue违反标准，因此我们可以修复“无法显示为有效的UTF-16代码单元”的措辞，也可以通过其他方式修复它。

Answer 2

这真的取决于你的意思＆＃34;足够大＆＃34;。 char16_t不是一个足以容纳任何Unicode字符的类型，包括那些我不允许使用的字符＆＃34;。您选择尝试将\uFFFF, which is "reserved for internal use"填入char16_t，因此您就是那个有过错的人。该程序只是按照您的指示进行。

Answer 3

行为很有意思，那就是：

stream.put(u'\uFFFF');

设置badbit，而：

stream << u'\uFFFF';
char16_t c = u'\uFFFF'; stream.write( &c, 1 );

未设置badbit。

这个答案只关注差异。

让我们在bits/ostream.tcc第164~165行检查gcc的源代码，我们可以看到put()检查值是否等于eof()，并设置badbit

if (traits_type::eq_int_type(__put, traits_type::eof()))  // <== It checks the value!
    __err |= ios_base::badbit;

从第196行开始，我们可以看到write()没有这个逻辑，只检查是否所有字符都写入缓冲区。

这解释了这种行为。

来自std::basic_ostream::put的{{3}}：

在内部，该函数首先访问输出序列构建一个哨兵对象。然后（如果好），它将c插入其中关联的流缓冲区对象，就像调用其成员函数一样溅射，最后在返回之前摧毁哨兵物体。

它没有说明eof()的检查。

所以我认为这可能是文档中的错误或实现中的错误。

char_traits <char16_t> :: int_type的大小不够大吗？

3 个答案: