Poll函数源码剖析

I/O复用函数——poll

poll作为select升级版,它去除了select1024个描述符的限制,并且也取消了select用三个位图描述,而用整体的pollfd指针实现。

源码部分

asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long timeout)
{
    struct poll_wqueues table;
     int fdcount, err;
     unsigned int i;
    struct poll_list *head;
     struct poll_list *walk;

    /* Do a sanity check on nfds ... */
    //如果nfds大于了打开的最大的文件描述符或者是大于规定打开的最大描述符(默认256)的值,那么返回报错
    if (nfds > current->files->max_fdset && nfds > OPEN_MAX)
        return -EINVAL;

    //对超时时间进行检查
    if (timeout) {
        /* Careful about overflow in the intermediate values */
        if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
            timeout = (unsigned long)(timeout*HZ+999)/1000+1;
        else /* Negative or overflow */
            timeout = MAX_SCHEDULE_TIMEOUT;
    }

    poll_initwait(&table); //对table初始化

    head = NULL;
    walk = NULL;
    i = nfds;
    err = -ENOMEM;
    while(i!=0) {
        struct poll_list *pp;
        pp = kmalloc(sizeof(struct poll_list)+
                sizeof(struct pollfd)*(i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i),//用来存entries数组
                    GFP_KERNEL);//申请一块存放Poll的事件集的空间
        if(pp==NULL)
            goto out_fds;
        pp->next=NULL;
        pp->len = (i>POLLFD_PER_PAGE?POLLFD_PER_PAGE:i);
        if (head == NULL)
            head = pp;
        else
            walk->next = pp;

        walk = pp;
        //从用户空间拷贝
        if (copy_from_user(pp->entries, ufds + nfds-i, 
                sizeof(struct pollfd)*pp->len)) {
            err = -EFAULT;
            goto out_fds;
        }
        i -= pp->len;
    }
    fdcount = do_poll(nfds, head, &table, timeout);
    /*
     * 到此处建立完成了一个链表,每个链表节点大约是一个页面的大小,由struct poll_list指针控制,pollfd就是通过list的entries数组访问
     * 循环主要是将fd从用户态拷贝到entries中,也就是copy_from_user()所做的事情
     * 一般而言我们只用监控几个fd,也就是一页面,但是当用户传入很多fd时候,那么参数传递和页面分配就成了poll最大的性能瓶颈
     */
    
    //完成内核poll部分,将返回数据拷贝至用户空间
    walk = head;
    err = -EFAULT;
    while(walk != NULL) {
        struct pollfd *fds = walk->entries;
        int j;

        for (j=0; j < walk->len; j++, ufds++) {
            if(__put_user(fds[j].revents, &ufds->revents))
                goto out_fds;
        }
        walk = walk->next;
      }
    err = fdcount;
    if (!fdcount && signal_pending(current))
        err = -EINTR;
out_fds:
    walk = head;
    while(walk!=NULL) {
        struct poll_list *pp = walk->next;
        kfree(walk);
        walk = pp;
    }
    poll_freewait(&table);
    return err;
}

再进入do_poll:

static int do_poll(unsigned int nfds,  struct poll_list *list,
            struct poll_wqueues *wait, long timeout)
{
    int count = 0;
    poll_table* pt = &wait->pt;

    if (!timeout)
        pt = NULL;
 
    for (;;) {
        struct poll_list *walk;
        set_current_state(TASK_INTERRUPTIBLE);
        walk = list;
        //在此处遍历fd,如果传入的fd过多就会造成poll的瓶颈
        while(walk != NULL) {
            do_pollfd( walk->len, walk->entries, &pt, &count);
            walk = walk->next;
        }
        pt = NULL;
        if (count || !timeout || signal_pending(current))
            break;
        count = wait->error;
        if (count)
            break;//当fd中出现数据了跳出
        timeout = schedule_timeout(timeout);//更新超时时间,让current进程挂起,别的进程继续运行,等timeout时间到了再返回运行current
    }
    __set_current_state(TASK_RUNNING);
    return count;
}

核心则是do_pollfd:

static void do_pollfd(unsigned int num, struct pollfd * fdpage,
    poll_table ** pwait, int *count)
{
    int i;

    for (i = 0; i < num; i++) {
        int fd;
        unsigned int mask;
        struct pollfd *fdp;

        mask = 0;
        fdp = fdpage+i;//切换到当前pollfd
        fd = fdp->fd;//切换到fd
        if (fd >= 0) {
            struct file * file = fget(fd);//切换到当前file列表
            mask = POLLNVAL;
            if (file != NULL) {
                mask = DEFAULT_POLLMASK; //设置默认标识
                if (file->f_op && file->f_op->poll)
                    mask = file->f_op->poll(file, *pwait); //以自己的等待队列为参数,把自己挂在fd对应的等待队列上
                mask &= fdp->events | POLLERR | POLLHUP;
                fput(file);
            }
            if (mask) {
                *pwait = NULL;
                (*count)++;//有了数据让计数加一
            }
        }
        fdp->revents = mask;
    }
}

来看看poll_wait()函数,最终的循环就在此处:

//poll_wait就是调用struct poll_table对应的回调函数
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
    if (p && wait_address)
        p->qproc(filp, wait_address, p);
}

那么我们就可以回顾一下sys_poll的流程了:

  1. 先注册回调函数__poll_wait
  2. 初始化table变量(类型为structural poll_wqueues)
  3. 拷贝用户传入的struct pollfd
  4. 然后轮流调用所有fd对应的poll,也就是把自己挂到所有fd的对应等待队列上。此时设备收到一条消息后或者填写完文件数据后,会唤醒设备等待队列上的进程,这时current便被唤醒。

源码看到这里,总结一下poll的流程:

  1. 首先是创建pollfd,定义struct pollfd event;
  2. 对event先进行清空,然后对其event.fd,event.events进行设置
    Poll函数源码剖析
  3. int poll(struct pollfd *fds, nfds_t nfds, int timeout);
    随后便是调用poll函数,poll(&event, 1, -1),-1指的是无限等待

    • 返回-1,表示poll出错
    • 返回0,表示超时
    • 返回值大于0,表示有几个fd就绪
  4. 完成poll后对fd进行检验,随后对event中的revent进行检验

    for(int i = 0; i < ret; ++i)
     {
          if(event[i].revent & POLLERR)//设备出错
          ……
          ……
          ……
          if(event[i].revent & POLLIN)//读事件
          ……
          ……
          ……
     }

图片引自:https://blog.csdn.net/lianghe...

相关推荐