我遇到困难的情况,有点复杂, 请注意我的描述,谢谢!
我开发了一款名为doudizhu的在线游戏服务器(斗地主) 大多数中国人都知道它是什么。问题是, 一些小资源消耗函数花费大约十毫秒, 有时甚至超过一百。这些功能没有 操作文件,套接字或数据库,只需做一些简单的计算。
测量方法是,我写了一个名为TimeElapseMeasure
的类,
记录构造和破坏方法的时间,然后我计算
它经过的毫秒数。我使用的是ACE_High_Res_Timer,
在构造方法打开计时器,在destruct方法关闭,
然后使用elapsed_microseconds方法获取最后的微秒,
除以1000就是我想要的,以毫秒为单位。源代码
我将在下面发布,包括一些小资源消耗函数
和gdb回溯。
通常,这个游戏服务器看起来像这样:
我花了很多时间找到时间,但没有任何结果。 在gdb中查找所有回溯,所有其他线程都要睡觉 或只是条件等待。
另一种测量方法,记录刚刚recvd的时间 包和冲洗响应包的时间。 有时它会持续几秒 - 秒 - 令人惊讶的是,在几秒钟内。 时间成本在哪里?
请帮帮我。
这是时间成本函数.mea2会调用assert, 在writev函数返回后开始测量,但仍然是成本 太多时间,让我困惑。我已经证明了,这里写的 只需在每次Svc_Send_Handler :: write调用中调用一次。
int Svc_Send_Handler::write(void)
{
#ifndef NDEBUG
TimeElapseMeasure mea( "Svc_Send_Handler::write" );
int count = 0;
#endif
while (1)
{
size_t buf_size = buf_.size();
if ( buf_size == 0 )
{
break;
}
int iovcnt = buf_size;
struct iovec iov[iovcnt];
int sum_bytes = 0;
Buffer_List currSendList;
#ifndef NDEBUG
if( count > 1 )
{
assert(false);
}
++count;
std::vector<uint32_t> sendMsgId;
#endif
for( int i = 0;i != iovcnt;++i )
{
Block_Buffer *send_buf = buf_.front();
buf_.pop_front();
if( ! send_buf )
{
assert(false);
continue;
}
currSendList.push_back( send_buf );
iov[i].iov_base = send_buf->get_read_ptr();
iov[i].iov_len = send_buf->readable_bytes();
sum_bytes += send_buf->readable_bytes();
#ifndef NDEBUG
size_t rd_idx_org = send_buf->get_read_idx();
uint32_t len = 0;
uint32_t msgId = 0;
(*send_buf) >> len >> msgId;
sendMsgId.push_back( msgId );
send_buf->set_read_idx( rd_idx_org );
ProcessDelayMeasure::instance()->secTillNow( msgId );
#endif
}
TimeElapseMeasure mea1( "Svc_Send_Handler::write bf writev" );
int ret = ::writev(this->get_fd(), iov, iovcnt);
TimeElapseMeasure mea2( "Svc_Send_Handler::write af writev" );
if (ret == -1)
{
ACE_DEBUG ((LM_DEBUG,
ACE_TEXT ("\n\n\n writev -1,errno:%d\n\n\n"),
errno
));
perror("writev");
if (errno == EINTR)
{
continue;
}
else if (errno == EWOULDBLOCK)
{
return ret;
}
else
{
for( Buffer_List::iterator it = currSendList.begin();
it != currSendList.end(); ++it )
{
sender_->push_block( *it );
}
for (Buffer_List::iterator it = buf_.begin(); it != buf_.end(); ++it)
{
sender_->push_block(*it);
}
buf_.clear();
handle_close();
return 0;
}
}
else if (ret == sum_bytes)
{
#ifndef NDEBUG
for( int i = 0; i != sendMsgId.size(); ++i )
{
int msg_id = sendMsgId.at( i );
ProcessDelayMeasure::instance()->endProcessMsg( msg_id );
}
#endif
for( int i = 0; i != iovcnt; ++i )
{
sender_->push_block( currSendList.front() );
currSendList.pop_front();
}
continue;
}
else
{
assert(false);
size_t writed_bytes = ret, remove_count = 0;
for (Buffer_List::iterator it = buf_.begin(); it != buf_.end(); ++it)
{
if (writed_bytes >= (*it)->readable_bytes())
{
++remove_count;
writed_bytes -= (*it)->readable_bytes();
sender_->push_block(*it);
} else
{
(*it)->set_read_idx((*it)->get_read_idx() + writed_bytes);
break;
}
}
std::cout << "remove_count = " << remove_count << std::endl;
for (size_t i = 0; i < remove_count; ++i)
buf_.pop_front();
return ret;
}
}
return 0;
}
Here is the measure class,just post the key method:
TimeElapseMeasure::TimeElapseMeasure( const std::string & name )
{
_name = name;
_timer.start (); //ACE_High_Res_Timer
}
TimeElapseMeasure::~TimeElapseMeasure()
{
_timer.stop ();
ACE_hrtime_t nanoseconds;
_timer.elapsed_time (nanoseconds);
ACE_hrtime_t us;
_timer.elapsed_microseconds(us);
ACE_hrtime_t ms = us / 1000;
if( ms > 1 )
{
ACE_DEBUG ((LM_DEBUG,
ACE_TEXT ("%t [%s] cost %Q us %Q ms\n"),
_name.c_str(),
us,
ms
));
}
if( ms > 20 )
{
assert(false);
}
}
void ProcessDelayMeasure::beginRecvMsg( int type )
{
#ifndef NDEBUG
ACE_High_Res_Timer timer;timer.start();
_recvMsgDelay[type] = timer;
ACE_Time_Value atv;
ACE_Time_Value aa = atv.now();
ACE_UINT64 mills = aa.get_msec();
_recvSec[type] = mills;
ACE_DEBUG ((LM_DEBUG,
ACE_TEXT ("--------> beginRecvMsg msg:%d\n"),
type
));
#endif
}
void ProcessDelayMeasure::secTillNow( int type,bool bResMsgType )
{
#ifndef NDEBUG
int sendType = 0;
if( bResMsgType )
{
sendType = type - 400000; //4000000 is the diff between the request type from the client and response type from the server
}
else
{
sendType = type;
}
std::map<int,ACE_UINT64>::const_iterator ciFind = _recvSec.find( sendType );
if( _recvSec.end() != ciFind )
{
ACE_Time_Value atv;
ACE_Time_Value aa = atv.now();
ACE_UINT64 now = aa.get_msec();
ACE_UINT64 last = now - ciFind->second;
ACE_DEBUG ((LM_DEBUG,
ACE_TEXT ("--------> secTillNow msg:%s cost: %Q \n"),
CUtil::msgId2Name( type ).c_str(),
last));
//RES_SELECT_ROOM this would sleep some seconds,because of the business need
//Though I increase the last variable more that 150 ms,
//it still aborts at some time,I will cry out!
if( last > 150 && RES_SELECT_ROOM != type )
{
assert(false);
}
}
#endif
}
void ProcessDelayMeasure::endProcessMsg( int type )
{
#ifndef NDEBUG
this->secTillNow( type );
int sendType = type - 400000;
std::map<int,ACE_High_Res_Timer>::iterator ci = _recvMsgDelay.find( sendType );
if( _recvMsgDelay.end() != ci )
{
ci->second.stop();
ACE_hrtime_t us;
ci->second.elapsed_microseconds( us );
ACE_hrtime_t ms = us / 1000;
if( ms > 1 )
{
ACE_DEBUG ((LM_DEBUG,
ACE_TEXT ("--------> endProcessMsg msg:%s cost: %Q ms\n"),
CUtil::msgId2Name( type ).c_str(),
ms));
}
//RES_SELECT_ROOM this would sleep some seconds,because of the business need
//Though I increase the last variable more that 150 ms,
//it still aborts at some time,I will cry out!
if( ms > 150 && RES_SELECT_ROOM != type )
{
assert(false);
}
_recvMsgDelay.erase( ci );
}
#endif
}
因为gdb线程全部应用但是全部太大,我会在以后发布,如果需要的话。 再次感谢!
google gprof结果:
Total: 589 samples
488 82.9% 82.9% 498 84.6% __nanosleep_nocancel
18 3.1% 85.9% 18 3.1% __pthread_mutex_lock
12 2.0% 87.9% 50 8.5% List::empty
12 2.0% 90.0% 12 2.0% pthread_mutex_unlock
8 1.4% 91.3% 8 1.4% __pthread_disable_asynccancel
7 1.2% 92.5% 26 4.4% Mutex_Guard::Mutex_Guard
7 1.2% 93.7% 7 1.2% std::list::empty
4 0.7% 94.4% 4 0.7% __read_nocancel
4 0.7% 95.1% 4 0.7% nanosleep
3 0.5% 95.6% 4 0.7% __pthread_enable_asynccancel
3 0.5% 96.1% 3 0.5% pthread_sigmask
2 0.3% 96.4% 274 46.5% Logic_Manager::process_list
2 0.3% 96.8% 3 0.5% Time_Value::operator timespec
2 0.3% 97.1% 2 0.3% Time_Value::sec
2 0.3% 97.5% 507 86.1% Time_Value::sleep
1 0.2% 97.6% 1 0.2% ACE_Time_Value::ACE_Time_Value
1 0.2% 97.8% 1 0.2% Logic_Manager::isRunning
1 0.2% 98.0% 1 0.2% Logic_Server::instance
1 0.2% 98.1% 19 3.2% Mutex_Guard::acquire
1 0.2% 98.3% 14 2.4% Mutex_Guard::release
1 0.2% 98.5% 27 4.6% Stream_Packer::process_drop
1 0.2% 98.6% 11 1.9% Stream_Packer::split_block_process
1 0.2% 98.8% 10 1.7% Thread_Mutex::release
1 0.2% 99.0% 1 0.2% Time_Value::set
1 0.2% 99.2% 1 0.2% _Hashtable_iterator
1 0.2% 99.3% 1 0.2% __connect_nocancel
1 0.2% 99.5% 1 0.2% cond_signal (inline)
1 0.2% 99.7% 1 0.2% std::_Rb_tree::_S_key
1 0.2% 99.8% 1 0.2% writev
1 0.2% 100.0% 15 2.5% ~Mutex_Guard
0 0.0% 100.0% 1 0.2% 0x00007fffdb5fd8ef
0 0.0% 100.0% 1 0.2% 0x00007ffff372e19f
0 0.0% 100.0% 1 0.2% 0x00007ffff372e1df
0 0.0% 100.0% 1 0.2% 0x00007ffff372e99f
0 0.0% 100.0% 1 0.2% 0x00007ffff412f99f
0 0.0% 100.0% 6 1.0% 0x00007ffff4b30c9f
0 0.0% 100.0% 2 0.3% 0x00007ffff4b30ccf
0 0.0% 100.0% 8 1.4% 0x00007ffff4b30d0f
0 0.0% 100.0% 1 0.2% ACE_Condition::signal
0 0.0% 100.0% 3 0.5% ACE_Log_Msg::log@e6e00
0 0.0% 100.0% 3 0.5% ACE_Log_Msg::log@e7410
0 0.0% 100.0% 3 0.5% ACE_Log_Msg::log@e8b90
0 0.0% 100.0% 2 0.3% ACE_Task_Base::svc_run
0 0.0% 100.0% 2 0.3% ACE_Thread_Adapter::invoke
0 0.0% 100.0% 2 0.3% ACE_Thread_Adapter::invoke_i
0 0.0% 100.0% 7 1.2% Epoll_Watcher::loop
0 0.0% 100.0% 3 0.5% Epoll_Watcher::process_timer_event
0 0.0% 100.0% 2 0.3% Epoll_Watcher::watcher_loop
0 0.0% 100.0% 2 0.3% Logic_Manager::process_100001
0 0.0% 100.0% 1 0.2% Logic_Manager::process_block
0 0.0% 100.0% 275 46.7% Logic_Manager::run
0 0.0% 100.0% 1 0.2% Logic_Player::respond_error_result
0 0.0% 100.0% 1 0.2% Logic_Player::respond_finer_result
0 0.0% 100.0% 1 0.2% Logic_Sender::find_client
0 0.0% 100.0% 2 0.3% LoginSvc::svc
0 0.0% 100.0% 1 0.2% MySQLManager::MySQLManager
0 0.0% 100.0% 1 0.2% MySQLManager::initConnection
0 0.0% 100.0% 1 0.2% MysqlPool::MysqlPool
0 0.0% 100.0% 1 0.2% MysqlPool::init
0 0.0% 100.0% 1 0.2% MysqlPool::instance
0 0.0% 100.0% 1 0.2% Object_Pool::pop
0 0.0% 100.0% 2 0.3% ProcessDelayMeasure::beginRecvMsg
0 0.0% 100.0% 1 0.2% ProcessDelayMeasure::secTillNow
0 0.0% 100.0% 2 0.3% Receiver::run
0 0.0% 100.0% 1 0.2% Sender::append_send_block
0 0.0% 100.0% 1 0.2% Sender::process_drop
0 0.0% 100.0% 5 0.8% Sender::run
0 0.0% 100.0% 2 0.3% Sender_Watcher::inner_cmd
0 0.0% 100.0% 297 50.4% Stream_Packer::process_list
0 0.0% 100.0% 299 50.8% Stream_Packer::run
0 0.0% 100.0% 2 0.3% Svc_Recv_Handler::handle_input
0 0.0% 100.0% 3 0.5% Svc_Send_Handler::handle_timeout
0 0.0% 100.0% 3 0.5% Svc_Send_Handler::write
0 0.0% 100.0% 581 98.6% Thread::thr_func
0 0.0% 100.0% 12 2.0% Thread_Mutex::acquire
0 0.0% 100.0% 1 0.2% Time_Value::Time_Value
0 0.0% 100.0% 4 0.7% _IO_default_uflow_internal
0 0.0% 100.0% 4 0.7% _IO_file_underflow@@GLIBC_2.2.5
0 0.0% 100.0% 583 99.0% __clone
0 0.0% 100.0% 4 0.7% __libc_start_main
0 0.0% 100.0% 1 0.2% __msync_nocancel
0 0.0% 100.0% 7 1.2% __pthread_mutex_lock_full
0 0.0% 100.0% 3 0.5% __pthread_mutex_unlock_usercnt
0 0.0% 100.0% 4 0.7% _start
0 0.0% 100.0% 4 0.7% getchar
0 0.0% 100.0% 4 0.7% main
0 0.0% 100.0% 1 0.2% my_connect
0 0.0% 100.0% 1 0.2% mysql_real_connect
0 0.0% 100.0% 583 99.0% start_thread
0 0.0% 100.0% 1 0.2% std::_Hashtable::end
0 0.0% 100.0% 1 0.2% std::_Rb_tree::find
0 0.0% 100.0% 1 0.2% std::map::find
0 0.0% 100.0% 3 0.5% thr_sigsetmask (inline)
0 0.0% 100.0% 3 0.5% ~ACE_Log_Msg_Sig_Guard (inline)
答案 0 :(得分:0)
我不确定,但它闻起来像某种同步问题。
我的经验中的一些注释:
您使用的那种多线程不会在使用整个可访问的处理能力方面为您提供最佳性能。首先,因为您使用的线程数量有限,因此不会向上或向下扩展。其次,因为线程同步会显着降低您的性能。到目前为止,我推荐和测试的最佳解决方案是使用具有reactor或proactor模式的异步套接字。你可以在boost :: asio库文档中找到一些很好的解释。为了提高代码的可读性和性能,你可以尝试使用许多协同程序实现之一(据我所知boost :: asio允许你现在使用boost :: coroutine)。
PS。如果您可以粘贴一些测量日志,这有助于找到代码中的弱点。