Question

我们可以编写一个c程序来查找Linux中上下文切换所花费的时间吗？如果你有代码，请你分享代码吗？感谢

Answer 1

分析切换时间非常困难，但内核延迟分析工具以及oprofile（可以分析内核本身）将帮助您。

为了对交互式应用程序性能进行基准测试，我编写了一个名为latencybench的小工具来测量意外的延迟峰值：

// Compile with g++ latencybench.cc -o latencybench -lboost_thread-mt
// Should also work on MSVC and other platforms supported by Boost.

#include <boost/format.hpp>
#include <boost/thread/thread.hpp>
#include <boost/date_time.hpp>
#include <algorithm>
#include <cstdlib>
#include <csignal>

volatile bool m_quit = false;

extern "C" void sighandler(int) {
    m_quit = true;
}

std::string num(unsigned val) {
    if (val == 1) return "one occurrence";
    return boost::lexical_cast<std::string>(val) + " occurrences";
}

int main(int argc, char** argv) {
    using namespace boost::posix_time;
    std::signal(SIGINT, sighandler);
    std::signal(SIGTERM, sighandler);
    time_duration duration = milliseconds(10);
    if (argc > 1) {
        try {
            if (argc != 2) throw 1;
            unsigned ms = boost::lexical_cast<unsigned>(argv[1]);
            if (ms > 1000) throw 2;
            duration = milliseconds(ms);
        } catch (...) {
            std::cerr << "Usage: " << argv[0] << " milliseconds" << std::endl;
            return EXIT_FAILURE;
        }
    }
    typedef std::map<long, unsigned> Durations;
    Durations durations;
    unsigned samples = 0, wrongsamples = 0;
    unsigned max = 0;
    long last = -1;
    std::cout << "Measuring actual sleep delays when requesting " << duration.total_milliseconds() << " ms: (Ctrl+C when done)" << std::endl;
    ptime begin = boost::get_system_time();
    while (!m_quit) {
        ptime start = boost::get_system_time();
        boost::this_thread::sleep(start + duration);
        long actual = (boost::get_system_time() - start).total_milliseconds();
        ++samples;
        unsigned num = ++durations[actual];
        if (actual != last) {
            std::cout << "\r  " << actual << " ms " << std::flush;
            last = actual;
        }
        if (actual != duration.total_milliseconds()) {
            ++wrongsamples;
            if (num > max) max = num;
            std::cout << "spike at " << start - begin << std::endl;
            last = -1;
        }
    }
    if (samples == 0) return 0;
    std::cout << "\rTotal measurement duration:  " << boost::get_system_time() - begin << "\n";
    std::cout << "Number of samples collected: " << samples << "\n";
    std::cout << "Incorrect delay count:       " << wrongsamples << boost::format(" (%.2f %%)") % (100.0 * wrongsamples / samples) << "\n\n";
    std::cout << "Histogram of actual delays:\n\n";
    unsigned correctsamples = samples - wrongsamples;
    const unsigned line = 60;
    double scale = 1.0;
    char ch = '+';
    if (max > line) {
        scale = double(line) / max;
        ch = '*';
    }
    double correctscale = 1.0;
    if (correctsamples > line) correctscale = double(line) / correctsamples;
    for (Durations::const_iterator it = durations.begin(); it != durations.end(); ++it) {
        std::string bar;
        if (it->first == duration.total_milliseconds()) bar = std::string(correctscale * it->second, '>');
        else bar = std::string(scale * it->second, ch);
        std::cout << boost::format("%5d ms | %s %d") % it->first % bar % it->second << std::endl;
    }
    std::cout << "\n";
    std::string indent(30, ' ');
    std::cout << indent << "+-- Legend ----------------------------------\n";
    std::cout << indent << "|  >  " << num(1.0 / correctscale) << " (of " << duration.total_milliseconds() << " ms delay)\n";
    if (wrongsamples > 0) std::cout << indent << "|  " << ch << "  " << num(1.0 / scale) << " (of any other delay)\n";
}

Ubuntu 2.6.32-14-通用内核的结果。测量时，我正在使用四个内核编译C ++代码并同时使用OpenGL图形玩游戏（以使其更有趣）：

Total measurement duration:  00:01:45.191465
Number of samples collected: 10383
Incorrect delay count:       196 (1.89 %)

Histogram of actual delays:

   10 ms | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 10187
   11 ms | *************************************************** 70
   12 ms | ************************************************************ 82
   13 ms | ********* 13
   14 ms | ********* 13
   15 ms | ** 4
   17 ms | *** 5
   18 ms | * 2
   19 ms | **** 6
   20 ms |  1

                              +-- Legend ----------------------------------
                              |  >  169 occurrences (of 10 ms delay)
                              |  *  one occurrence (of any other delay)

使用rt-patched内核我得到了更好的结果，仅差10-12毫秒。

打印输出中的图例似乎正在出现舍入错误或其他内容（并且粘贴的源代码不是完全相同的版本）。我从来没有真正完成这个应用程序的发布...

Answer 2

如果您具有超级用户权限，则可以运行带有上下文切换探测点的SystemTap程序，并在每个程序中打印当前时间：

probe scheduler.ctxswitch {
    printf("Switch from %d to %d at %d\n", prev_pid, next_pid, gettimeofday_us())
}

我不确定输出数据有多可靠，但这是一种快速简便的方法来获取数据。

Answer 3

您如何看待，用秒或毫秒甚至微秒测量上下文切换。所有发生的都不到纳秒。如果您想花费大量时间进行可测量的上下文切换，那么...... 尝试在Assembly上编写的一些实模式内核类型代码，你可能会看到一些东西。

Answer 4

简短回答 - 不。很长的回答。

上下文切换大致发生在：

用户进程通过系统调用或陷阱（例如页面错误）进入内核，并且所请求的数据（例如文件内容）尚不可用，因此内核将所述用户进程置于睡眠状态并切换到另一个可运行进程。
内核检测到给定的用户进程消耗了它的全时量子（这发生在从定时器中断调用的代码中。）
数据可用于当前正在休眠的更高当前优先级进程（这是从IO中断/周围调用的代码发生的。）

交换机本身是单向的，所以我们在userland中做的最好的事情（我假设你要问的是）测量RTT的种类，从我们的流程到另一个流程。另一个过程也需要时间来完成它的工作。我们当然可以让两个或多个进程合作，但事实是内核并不能保证我们的下一个进程会被选中。可能可以通过RT调度程序预测切换到给定的进程，但我没有建议，建议欢迎。

Answer 5

测量上下文切换的成本有点棘手。我们可以通过在单个CPU上运行两个进程，并在它们之间设置三个Linux管道来计算上下文切换所花费的时间;

两个用于在进程和
第3个将用于分享在子进程中花费的时间。

然后第一个进程向第一个管道发出写入，并等待第二个管道的读取;在看到第一个进程等待从第二个管道读取内容时，操作系统将第一个进程置于阻塞状态，并切换到另一个进程，该进程从第一个管道读取然后写入第二个进程。当第二个进程再次尝试从第一个管道读取时，它会阻塞，因此通信的来回循环继续。通过反复测量这样的通信成本，您可以很好地估计上下文切换的成本。

在具有多个CPU的系统中出现了测量上下文切换成本的一个难点;您需要在此类系统上执行的操作是确保您的上下文切换过程位于同一处理器上。幸运的是，大多数操作系统都调用将进程绑定到特定处理器;例如，在Linux上，sched_setaffinity（）调用就是您正在寻找的。通过确保两个进程都在同一个处理器上，您确保测量操作系统停止一个进程并在同一个CPU上恢复另一个进程的成本。

我在这里发布了我的解决方案，用于计算进程之间的上下文切换。

    #define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <sched.h>
#include <stdlib.h>
#include <string.h>
#include <linux/unistd.h>
#include <sys/time.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <errno.h>

pid_t getpid( void )
{
    return syscall( __NR_getpid );
}

int main()
{
    /*********************************************************************************************
        To make sure context-switching processes are located on the same processor :
        1. Bind a process to a particular processor using sched_setaffinity.    
        2. To get the maximum priority value (sched_get_priority_max) that can be used with 
           the scheduling algorithm identified by policy (SCHED_FIFO).** 
        **********************************************************************************************/

    cpu_set_t set;
    struct sched_param prio_param;
    int prio_max;

    CPU_ZERO( &set );
    CPU_SET( 0, &set );
        memset(&prio_param,0,sizeof(struct sched_param));

    if (sched_setaffinity( getpid(), sizeof( cpu_set_t ), &set ))
    {
        perror( "sched_setaffinity" );
                exit(EXIT_FAILURE);
    }

    if( (prio_max = sched_get_priority_max(SCHED_FIFO)) < 0 )
    {
                perror("sched_get_priority_max");
        }

    prio_param.sched_priority = prio_max;
    if( sched_setscheduler(getpid(),SCHED_FIFO,&prio_param) < 0 )
    {
                perror("sched_setscheduler");
                exit(EXIT_FAILURE);
        }

    /*****************************************************************************************************
        1. To create a pipe for a fork, the parent and child processes use pipe to read and write, 
           read and write string, using this for context switch.
        2. The parent process first to get the current timestamp (gettimeofday), then write to the pipe,. 
           Then the child should be read in from the back, 
           then the child process to write string, the parent process reads. 
           After the child process to get the current timestamp. 
           This is roughly the difference between two timestamps n * 2 times the context switch time.
    *******************************************************************************************************/

    int     ret=-1;
    int     firstpipe[2];
    int     secondpipe[2];
    int     timepipe[2];
        int     nbytes;
        char    string[] = "Hello, world!\n";
        char    temp[] = "Sumit Gemini!\n";
        char    readbuffer[80];
        char    tempbuffer[80];
    int     i=0;
    struct  timeval start,end;

    // Create an unnamed first pipe
        if (pipe(firstpipe) == -1) 
    {
            fprintf(stderr, "parent: Failed to create pipe\n");
            return -1;
        }

    // create an unnamed Second pipe
        if (pipe(secondpipe) == -1) 
    {
            fprintf(stderr, "parent: Failed to create second pipe\n");
            return -1;
        }

    // Create an unnamed time pipe which will share in order to show time spend between processes
        if (pipe(timepipe) == -1) 
    {
            fprintf(stderr, "parent: Failed to create time pipe\n");
            return -1;
        }


    if((ret=fork())==-1)
        perror("fork");
    else if(ret==0)
    {
                int n=-1;
        printf("Child  ----> %d\n",getpid());

        for(n=0;n<5;n++)
        {
                    nbytes = read(firstpipe[0], readbuffer, sizeof(readbuffer));
                    printf("Received string: %s", readbuffer);
            write(secondpipe[1], temp, strlen(temp)+1);
        }

        gettimeofday(&end,0);
                n = sizeof(struct timeval);

                if( write(timepipe[1],&end,sizeof(struct timeval)) != n )
        {
                fprintf(stderr, "child: Failed to write in time pipe\n");
                        exit(EXIT_FAILURE);
                }

    }
    else
    {
        double switch_time;
                int n=-1;
        printf("Parent  ----> %d\n",getpid());
        gettimeofday(&start,0);
                /* Read in a string from the pipe */

        for(n=0;n<5;n++)
        {
            write(firstpipe[1], string, strlen(string)+1);
            read(secondpipe[0], tempbuffer, sizeof(tempbuffer));
                    printf("Received temp: %s", tempbuffer);
        }

        n = sizeof(struct timeval);
                if( read(timepipe[0],&end,sizeof(struct timeval)) != n )
        {
                fprintf(stderr, "Parent: Failed to read from time pipe\n");
                        exit(EXIT_FAILURE);
                }

        wait(NULL);
        switch_time = ((end.tv_sec-start.tv_sec)*1000000+(end.tv_usec-start.tv_usec))/1000.0;
                printf("context switch between two processes: %0.6lfms\n",switch_time/(5*2));


    }   

    return 0;
}

Answer 6

为什么不把这个作为粗略估计？

#include <ctime>
#include <cstdio>
#include <sys/time.h>
#include <unistd.h>

int main(int argc, char **argv) {
        struct timeval tv, tvt;
        int diff;
        gettimeofday(&tv, 0);
        diff = tvt.tv_usec - tv.tv_usec;
        if (fork() != 0) {
                gettimeofday(&tvt, 0);
                diff = tvt.tv_usec - tv.tv_usec;
                printf("%d\n", diff);
        }
        return 0;
}

注意：实际上我们不应该将null作为第二个参数，检查man gettimeofday。另外，我们应该检查一下tvt.tv_usec＆gt; tv.tv_usec！只是草稿。

编写一个C程序来测量Linux OS中上下文切换所花费的时间

6 个答案: