DPDK igb_uio驅(qū)動(dòng)分析

本文整理下之前的學(xué)習(xí)筆記,基于DPDK17.11版本源碼分析痪欲。主要分析一下igb_uio驅(qū)動(dòng)源碼界拦。

總線-設(shè)備-驅(qū)動(dòng)

首先簡(jiǎn)單介紹一下kernel中的總線-設(shè)備-驅(qū)動(dòng)模型,以pci總線為例两嘴,pci總線上有兩個(gè)表,一個(gè)用于保存系統(tǒng)中的pci設(shè)備吃溅,一個(gè)用于保存pci設(shè)備對(duì)應(yīng)的驅(qū)動(dòng)溶诞。每當(dāng)加載pci設(shè)備驅(qū)動(dòng)時(shí)鸯檬,就會(huì)遍歷pci總線上的pci設(shè)備進(jìn)行匹配决侈,每當(dāng)插入pci設(shè)備到系統(tǒng)中時(shí),熱插拔機(jī)制就會(huì)自動(dòng)遍歷pci總線上的pci設(shè)備驅(qū)動(dòng)進(jìn)行匹配喧务,如果匹配成功則使用此驅(qū)動(dòng)初始化設(shè)備赖歌。

注冊(cè)pci總線
可以調(diào)用bus_register注冊(cè)總線。比如下面的pci總線功茴,平臺(tái)總線和usb總線等庐冯。

//注冊(cè)pci總線
struct bus_type pci_bus_type = {
    .name       = "pci",
    .match      = pci_bus_match,
    .uevent     = pci_uevent,
    .probe      = pci_device_probe,
    .remove     = pci_device_remove,
    .shutdown   = pci_device_shutdown,
    .dev_groups = pci_dev_groups,
    .bus_groups = pci_bus_groups,
    .drv_groups = pci_drv_groups,
    .pm     = PCI_PM_OPS_PTR,
};
bus_register(&pci_bus_type);

//注冊(cè)平臺(tái)總線
struct bus_type platform_bus_type = {
    .name       = "platform",
    .dev_groups = platform_dev_groups,
    .match      = platform_match,
    .uevent     = platform_uevent,
    .pm     = &platform_dev_pm_ops,
};
bus_register(&platform_bus_type);

//注冊(cè)u(píng)sb總線
struct bus_type usb_bus_type = {
    .name =     "usb",
    .match =    usb_device_match,
    .uevent =   usb_uevent,
};
bus_register(&usb_bus_type);

//注冊(cè)virtio總線
static struct bus_type virtio_bus = {
    .name  = "virtio",
    .match = virtio_dev_match,
    .dev_groups = virtio_dev_groups,
    .uevent = virtio_uevent,
    .probe = virtio_dev_probe,
    .remove = virtio_dev_remove,
};
bus_register(&virtio_bus)

注冊(cè)總線后,會(huì)在 /sys/bus 下生成總線目錄坎穿,比如 pci 總線會(huì)生成目錄 /sys/bus/pci

/**
 * bus_register - register a driver-core subsystem
 * @bus: bus to register
 *
 * Once we have that, we register the bus with the kobject
 * infrastructure, then register the children subsystems it has:
 * the devices and drivers that belong to the subsystem.
 */
int bus_register(struct bus_type *bus)
    struct subsys_private *priv;
    struct lock_class_key *key = &bus->lock_key;

    priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL);
    priv->bus = bus;
    bus->p = priv;
    kobject_set_name(&priv->subsys.kobj, "%s", bus->name);
    priv->subsys.kobj.kset = bus_kset;
    priv->subsys.kobj.ktype = &bus_ktype;
    kset_register(&priv->subsys);
    
    //此值為1加載驅(qū)動(dòng)時(shí)會(huì)自動(dòng)探測(cè)設(shè)備進(jìn)行匹配
    priv->drivers_autoprobe = 1;
    
    bus_create_file(bus, &bus_attr_uevent);
    
    //在總線目錄下展父,生成 devices 子目錄返劲,下面再包含具體pci設(shè)備子目錄
    priv->devices_kset = kset_create_and_add("devices", NULL,
                         &priv->subsys.kobj);
    //在總線目錄下,生成 drivers 子目錄栖茉,下面再包含具體驅(qū)動(dòng)子目錄
    priv->drivers_kset = kset_create_and_add("drivers", NULL,
                         &priv->subsys.kobj);
    //此鏈表用于保存加載的pci設(shè)備驅(qū)動(dòng)
    klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put);
    //此鏈表用于保存掃描到的pci設(shè)備
    klist_init(&priv->klist_drivers, NULL, NULL);
    
    //在sys文件系統(tǒng)創(chuàng)建 drivers_probe 和 drivers_autoprobe 文件
    add_probe_files(bus);
        bus_create_file(bus, &bus_attr_drivers_probe);
        bus_create_file(bus, &bus_attr_drivers_autoprobe);
    
    bus_add_groups(bus, bus->bus_groups);

注冊(cè)總線后篮绿,會(huì)生成文件/sys/bus/pci/drivers_autoprobe,寫此文件時(shí)在kernel中會(huì)調(diào)用如下函數(shù)吕漂,如果為1亲配,表示 bus 支持自動(dòng)探測(cè) device,則加載驅(qū)動(dòng)時(shí)惶凝,自動(dòng)遍歷所有pci設(shè)備進(jìn)行匹配

store_drivers_autoprobe
static ssize_t store_drivers_autoprobe(struct bus_type *bus,
                       const char *buf, size_t count)
{
    if (buf[0] == '0')
        bus->p->drivers_autoprobe = 0;
    else
        bus->p->drivers_autoprobe = 1;
    return count;
}

注冊(cè)驅(qū)動(dòng)到pci總線
結(jié)構(gòu)體struct pci_driver表示一個(gè)pci設(shè)備驅(qū)動(dòng)吼虎,其中id_table和dynids用來(lái)保存此驅(qū)動(dòng)支持的設(shè)備id等信息,如果有匹配的設(shè)備苍鲜,則調(diào)用probe函數(shù)思灰。

struct pci_driver {
    struct list_head node;
    const char *name;
    //靜態(tài)table,用來(lái)保存驅(qū)動(dòng)支持的id
    const struct pci_device_id *id_table;   /* must be non-NULL for probe to be called */
    int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);   /* New device inserted */
    void (*remove) (struct pci_dev *dev);   /* Device removed (NULL if not a hot-plug capable driver) */
    int  (*suspend) (struct pci_dev *dev, pm_message_t state);  /* Device suspended */
    int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
    int  (*resume_early) (struct pci_dev *dev);
    int  (*resume) (struct pci_dev *dev);                   /* Device woken up */
    void (*shutdown) (struct pci_dev *dev);
    int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */
    const struct pci_error_handlers *err_handler;
    struct device_driver    driver;
    //動(dòng)態(tài)table坡贺,通過(guò)寫文件 new_id 動(dòng)態(tài)添加id
    struct pci_dynids dynids;
};

調(diào)用函數(shù)pci_register_driver注冊(cè)pci設(shè)備驅(qū)動(dòng)官辈。

static struct pci_driver igbuio_pci_driver = {
    .name = "igb_uio",
    .id_table = NULL,  //DPDK 用到的 igb_uio, vfio-pci等驅(qū)動(dòng)的id_table默認(rèn)為空
    .probe = igbuio_pci_probe,
    .remove = igbuio_pci_remove,
};
pci_register_driver(&igbuio_pci_driver);


static const struct pci_device_id igb_pci_tbl[] = {
    { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
    { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
    ...
}

static struct pci_driver igb_driver = {
    .name     = igb_driver_name,
    .id_table = igb_pci_tbl,  //正常的kernel驅(qū)動(dòng)都有一個(gè)靜態(tài)的id_table
    .probe    = igb_probe,
    .remove   = igb_remove,
#ifdef CONFIG_PM
    .driver.pm = &igb_pm_ops,
#endif
    .shutdown = igb_shutdown,
    .sriov_configure = igb_pci_sriov_configure,
    .err_handler = &igb_err_handler
};
pci_register_driver(&igb_driver);

注冊(cè)驅(qū)動(dòng)后,會(huì)在/sys/bus/pci/drivers目錄下創(chuàng)建以驅(qū)動(dòng)名字命名的目錄遍坟,并在此目錄下創(chuàng)建new_id, bind和unbind等sys文件拳亿,可以通過(guò)這些文件動(dòng)態(tài)修改驅(qū)動(dòng)信息。

/*
 * pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded
 */
#define pci_register_driver(driver)     \
    __pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)

int __pci_register_driver(struct pci_driver *drv, struct module *owner,
              const char *mod_name)
{
    /* initialize common driver fields */
    drv->driver.name = drv->name;
    //bus固定為 pci_bus_type
    drv->driver.bus = &pci_bus_type;
    drv->driver.owner = owner;
    drv->driver.mod_name = mod_name;

    spin_lock_init(&drv->dynids.lock);
    INIT_LIST_HEAD(&drv->dynids.list);

    /* register with core */
    driver_register(&drv->driver);
        bus_add_driver(drv);
            struct bus_type *bus;
            struct driver_private *priv;
            
            bus = bus_get(drv->bus);
            priv = kzalloc(sizeof(*priv), GFP_KERNEL);
            klist_init(&priv->klist_devices, NULL, NULL);
            priv->driver = drv;
            drv->p = priv;
            priv->kobj.kset = bus->p->drivers_kset;
            kobject_init_and_add(&priv->kobj, &driver_ktype, NULL, "%s", drv->name);

            //將驅(qū)動(dòng)添加到pci總線
            klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers);

            //如果pci總線支持自動(dòng)探測(cè)設(shè)備愿伴,則在加載驅(qū)動(dòng)時(shí)就遍歷所有pci設(shè)備進(jìn)行匹配
            if (drv->bus->p->drivers_autoprobe) {
                driver_attach(drv);
                    //遍歷所有的pci設(shè)備肺魁,和drv進(jìn)行匹配
                    bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
                        //設(shè)備和驅(qū)動(dòng)進(jìn)行匹配
                        driver_match_device(drv, dev)
                        //如果匹配成功,并且設(shè)備還沒(méi)有加載其他驅(qū)動(dòng)隔节,則使用當(dāng)前驅(qū)動(dòng)drv
                        if (!dev->driver)
                            driver_probe_device(drv, dev);
            }

            module_add_driver(drv->owner, drv);

            driver_create_file(drv, &driver_attr_uevent);

            //bus->drv_groups 為 pci_drv_groups鹅经,
            //在sys文件系統(tǒng)創(chuàng)建 new_id 和 remove_id 文件
            driver_add_groups(drv, bus->drv_groups);
            
            //在sys文件系統(tǒng)創(chuàng)建 bind 和 unbind 文件,用來(lái)將驅(qū)動(dòng)綁定和解綁定設(shè)備
            if (!drv->suppress_bind_attrs) {
                add_bind_files(drv);
                    driver_create_file(drv, &driver_attr_unbind);
                    driver_create_file(drv, &driver_attr_bind);
            }
}

向new_id寫入"0x0806 0x1521"信息(0x0806表示vendor id怎诫,0x1521為device id)時(shí)瘾晃,會(huì)調(diào)用kernel中的store_new_id,解析相關(guān)字段后幻妓,保存到動(dòng)態(tài)鏈表dynids蹦误,然后遍歷當(dāng)前所有的pci設(shè)備進(jìn)行匹配。

//定義struct driver_attribute driver_attr_new_id
static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
//定義 //struct driver_attribute driver_attr_remove_id
static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);

//定義 struct attribute_group pci_drv_groups
static struct attribute *pci_drv_attrs[] = {
    &driver_attr_new_id.attr,
    &driver_attr_remove_id.attr,
    NULL,
};
ATTRIBUTE_GROUPS(pci_drv);

static ssize_t store_new_id(struct device_driver *driver, const char *buf,size_t count)
    fields = sscanf(buf, "%x %x %x %x %x %x %lx",
            &vendor, &device, &subvendor, &subdevice,
            &class, &class_mask, &driver_data);
    if (fields < 2)
        return -EINVAL;
        
    pci_add_dynid(pdrv, vendor, device, subvendor, subdevice, class, class_mask, driver_data);  
        struct pci_dynid *dynid;

        dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
        dynid->id.vendor = vendor;
        dynid->id.device = device;
        dynid->id.subvendor = subvendor;
        dynid->id.subdevice = subdevice;
        dynid->id.class = class;
        dynid->id.class_mask = class_mask;
        dynid->id.driver_data = driver_data;

        spin_lock(&drv->dynids.lock);
        list_add_tail(&dynid->node, &drv->dynids.list);
        spin_unlock(&drv->dynids.lock);

        //設(shè)置new id時(shí)肉津,也會(huì)自動(dòng)匹配設(shè)備
        return driver_attach(&drv->driver);

向bind文件寫入網(wǎng)卡的pci地址時(shí)强胰,會(huì)調(diào)用kernel中的bind_store,將此網(wǎng)卡綁定到此驅(qū)動(dòng)妹沙。
向unbind文件寫入網(wǎng)卡的pci地址時(shí)偶洋,會(huì)調(diào)用kernel中的unbind_store,將此網(wǎng)卡和此驅(qū)動(dòng)解綁距糖。

//定義 struct driver_attribute driver_attr_bind玄窝,寫文件時(shí)牵寺,調(diào)用 bind_store
static DRIVER_ATTR_WO(bind);
//定義 struct driver_attribute driver_attr_unbind,寫文件時(shí)恩脂,調(diào)用 unbind_store
static DRIVER_ATTR_WO(unbind);

/*
 * Manually attach a device to a driver.
 * Note: the driver must want to bind to the device,
 * it is not possible to override the driver's id table.
 */
static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count)
    dev = bus_find_device_by_name(bus, NULL, buf);
    if (dev && dev->driver == NULL && driver_match_device(drv, dev)) {
        if (dev->parent)    /* Needed for USB */
            device_lock(dev->parent);
        device_lock(dev);
        err = driver_probe_device(drv, dev);
        device_unlock(dev);
        if (dev->parent)
            device_unlock(dev->parent);

        if (err > 0) {
            /* success */
            err = count;
        } else if (err == 0) {
            /* driver didn't accept device */
            err = -ENODEV;
        }
    }
    
/* Manually detach a device from its associated driver. */
static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count)
{
    struct bus_type *bus = bus_get(drv->bus);
    struct device *dev;
    int err = -ENODEV;

    dev = bus_find_device_by_name(bus, NULL, buf);
    if (dev && dev->driver == drv) {
        if (dev->parent)    /* Needed for USB */
            device_lock(dev->parent);
        device_release_driver(dev);
        if (dev->parent)
            device_unlock(dev->parent);
        err = count;
    }
    put_device(dev);
    bus_put(bus);
    return err;
}

發(fā)現(xiàn)pci設(shè)備
系統(tǒng)啟動(dòng)時(shí)會(huì)掃描所有的pci設(shè)備缸剪,以他們的pci地址為名字創(chuàng)建目錄,并在此目錄下創(chuàng)建相關(guān)的sys文件东亦。并且會(huì)遍歷所有的pci設(shè)備驅(qū)動(dòng)進(jìn)行匹配杏节。

pci_scan_root_bus
    pci_scan_child_bus(b);
        pci_scan_slot
            pci_scan_single_device
                pci_scan_device
                pci_device_add
                    device_add(&dev->dev);
                        bus_add_device(dev);
                            //bus->dev_groups為pci_dev_groups,
                            //會(huì)在 /sys/bus/pci/devices/'pci address'/ 目錄下創(chuàng)建 vendor, device等目錄
                            device_add_groups(dev, bus->dev_groups);
                            //將設(shè)備添加到pci總線鏈表
                            klist_add_tail(&dev->p->knode_bus, &bus->p->klist_devices);

    pci_bus_add_devices
        pci_bus_add_device
            pci_create_sysfs_dev_files(dev);
                //如果pci配置空間大于 PCI_CFG_SPACE_SIZE(256字節(jié)),則創(chuàng)建 /sys/bus/pci/devices/0000:81:00.0/config文件典阵,
                //大小為 4096 字節(jié)
                if (pdev->cfg_size > PCI_CFG_SPACE_SIZE)
                    retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
                else //否則config文件大小為 256 字節(jié)
                    retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);

                //創(chuàng)建 resource 文件奋渔,用戶態(tài)可以使用mmap映射 resource0 實(shí)現(xiàn)對(duì)網(wǎng)卡寄存器的操作
                pci_create_resource_files(pdev);
                    //創(chuàng)建 /sys/bus/pci/devices/0000:81:00.0/resource0 等文件
                    /* Expose the PCI resources from this device as files */
                    for (i = 0; i < PCI_ROM_RESOURCE; i++) {
                        /* skip empty resources */
                        if (!pci_resource_len(pdev, i))
                            continue;

                        retval = pci_create_attr(pdev, i, 0);
                            struct bin_attribute *res_attr;
                            res_attr = kzalloc(sizeof(*res_attr) + name_len, GFP_ATOMIC);
                            sysfs_bin_attr_init(res_attr);
                            if (write_combine) {
                                pdev->res_attr_wc[num] = res_attr;
                                sprintf(res_attr_name, "resource%d_wc", num);
                                res_attr->mmap = pci_mmap_resource_wc;
                            } else {
                                pdev->res_attr[num] = res_attr;
                                sprintf(res_attr_name, "resource%d", num);
                                res_attr->mmap = pci_mmap_resource_uc;
                            }
                            if (pci_resource_flags(pdev, num) & IORESOURCE_IO) {
                                res_attr->read = pci_read_resource_io;
                                res_attr->write = pci_write_resource_io;
                            }
                            res_attr->attr.name = res_attr_name;
                            res_attr->attr.mode = S_IRUSR | S_IWUSR;
                            res_attr->size = pci_resource_len(pdev, num);
                            res_attr->private = &pdev->resource[num];
                            //創(chuàng)建 kernel 文件
                            sysfs_create_bin_file(&pdev->dev.kobj, res_attr);

                        /* for prefetchable resources, create a WC mappable file */
                        if (!retval && pdev->resource[i].flags & IORESOURCE_PREFETCH)
                            retval = pci_create_attr(pdev, i, 1);
                    }

            //嘗試匹配驅(qū)動(dòng)
            device_attach(&dev->dev);
                //遍歷所有driver,查看是否有匹配此設(shè)備的driver
                bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
                    //判斷驅(qū)動(dòng)和設(shè)備是否匹配
                    driver_match_device
                        //pci_bus_match
                        drv->bus->match
                            pci_match_device(pci_drv, pci_dev);
                    //如果有匹配的壮啊,則調(diào)用驅(qū)動(dòng)的probe函數(shù)
                    driver_probe_device
                        really_probe(dev, drv);
                            //pci_device_probe
                            dev->bus->probe
                                __pci_device_probe
                                    pci_call_probe
                                        local_pci_probe
                                            pci_drv->probe(pci_dev, ddi->id);

向設(shè)備的driver_override文件寫入驅(qū)動(dòng)名字嫉鲸,表示此設(shè)備只能綁定到此驅(qū)動(dòng)。

static ssize_t driver_override_store(struct device *dev,
                     struct device_attribute *attr,
                     const char *buf, size_t count)
    struct pci_dev *pdev = to_pci_dev(dev);
    driver_override = kstrndup(buf, count, GFP_KERNEL);
    pdev->driver_override = driver_override;

如何匹配歹啼?
前面多次提到設(shè)備和驅(qū)動(dòng)進(jìn)行匹配玄渗,究竟如何匹配呢?

先看一下用來(lái)表示一個(gè)pci設(shè)備的結(jié)構(gòu)體pci_dev狸眼,其中如下幾個(gè)成員變量表示此pci設(shè)備的類型藤树,一般vendor和device就足夠,vendor表示此設(shè)備是哪個(gè)廠商的拓萌,device表示此設(shè)備的類型岁钓。

struct pci_dev {
    ...
    unsigned short  vendor;
    unsigned short  device;
    unsigned short  subsystem_vendor;
    unsigned short  subsystem_device;
    unsigned int    class;      /* 3 bytes: (base,sub,prog-if) */
    ...
}

再看一下用來(lái)表示設(shè)備驅(qū)動(dòng)的pci_driver,其中id_table和dynids用來(lái)保存此驅(qū)動(dòng)支持的設(shè)備類型微王,前者是靜態(tài)值屡限,后者可以通過(guò)驅(qū)動(dòng)目錄下的new_id動(dòng)態(tài)添加。設(shè)備類型使用pci_device_id結(jié)構(gòu)體來(lái)表示炕倘,其成員變量也是vendor,device等信息钧大,和pci_dev中的信息是一樣的,所以可以使用這幾個(gè)字段進(jìn)行匹配罩旋。

struct pci_device_id {
    __u32 vendor, device;       /* Vendor and device ID or PCI_ANY_ID*/
    __u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
    __u32 class, class_mask;    /* (class,subclass,prog-if) triplet */
    kernel_ulong_t driver_data; /* Data private to the driver */
};

struct pci_driver {
    struct pci_device_id *id_table
    struct pci_dynids dynids;
    ...
}

最終使用函數(shù)pci_match_device進(jìn)行驅(qū)動(dòng)和設(shè)備的匹配啊央。

static const struct pci_device_id pci_device_id_any = {
    .vendor = PCI_ANY_ID,
    .device = PCI_ANY_ID,
    .subvendor = PCI_ANY_ID,
    .subdevice = PCI_ANY_ID,
};

static const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev)
    //如果設(shè)備設(shè)置了 driver_override,則只能綁定到driver_override指定的驅(qū)動(dòng)上瘸恼。
    //如果不是此驅(qū)動(dòng)直接返回NULL
    /* When driver_override is set, only bind to the matching driver */
    if (dev->driver_override && strcmp(dev->driver_override, drv->name))
        return NULL;

    //首先查找驅(qū)動(dòng)的動(dòng)態(tài)鏈表和設(shè)備進(jìn)行匹配
    /* Look at the dynamic ids first, before the static ones */
    spin_lock(&drv->dynids.lock);
    list_for_each_entry(dynid, &drv->dynids.list, node) {
        if (pci_match_one_device(&dynid->id, dev)) {
            found_id = &dynid->id;
            break;
        }
    }
    spin_unlock(&drv->dynids.lock);

    //如果沒(méi)匹配到劣挫,則查找驅(qū)動(dòng)的靜態(tài)table
    if (!found_id)
        found_id = pci_match_id(drv->id_table, dev);
            while (ids->vendor || ids->subvendor || ids->class_mask) {
                if (pci_match_one_device(ids, dev))
                    return ids;
                ids++;
            }

    //如果仍然沒(méi)匹配到册养,但是指定了驅(qū)動(dòng)东帅,則強(qiáng)制認(rèn)為匹配成功,返回 pci_device_id_any
    /* driver_override will always match, send a dummy id */
    if (!found_id && dev->driver_override)
        found_id = &pci_device_id_any;

    return found_id;

//具體的匹配規(guī)則
static inline const struct pci_device_id *
pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
{
    if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
        (id->device == PCI_ANY_ID || id->device == dev->device) &&
        (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
        (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
        !((id->class ^ dev->class) & id->class_mask))
        return id;
    return NULL;
}

綁定到 igb_uio 驅(qū)動(dòng)

網(wǎng)卡如何綁定到igb_uio驅(qū)動(dòng)呢球拦?這里拿DPDK提供的腳步文件dpdk-devbind.py中的函數(shù)bind_one進(jìn)行分析靠闭。

def bind_one(dev_id, driver, force):
    '''Bind the device given by "dev_id" to the driver "driver". If the device
    is already bound to a different driver, it will be unbound first'''
    dev = devices[dev_id]
    saved_driver = None  # used to rollback any unbind in case of failure

    //如果網(wǎng)卡已經(jīng)綁定到某個(gè)驅(qū)動(dòng)帐我,則判斷是否是要綁定的驅(qū)動(dòng),如果是則返回愧膀,
    //如果不是拦键,則解綁之前的驅(qū)動(dòng)。unbind_one只要向驅(qū)動(dòng)的unbind寫入此網(wǎng)卡的pci地址即可解綁檩淋。
    # unbind any existing drivers we don't want
    if has_driver(dev_id):
        if dev["Driver_str"] == driver:
            print("%s already bound to driver %s, skipping\n"
                  % (dev_id, driver))
            return
        else:
            saved_driver = dev["Driver_str"]
            unbind_one(dev_id, force)
            dev["Driver_str"] = ""  # clear driver string

    //綁定方法根據(jù)kernel版本有不同的綁定方法芬为。
    //對(duì)于kernel版本大于等于3.15的,首先將驅(qū)動(dòng)名字寫入到網(wǎng)卡的文件 driver_override來(lái)指定此驅(qū)動(dòng)蟀悦。
    //而小于3.15的媚朦,需要將網(wǎng)卡的vendor和device id寫入驅(qū)動(dòng)的new_id文件。
    //為什么大于等于3.15的不使用new_id呢日戈?這是因?yàn)楦甙姹镜膎ew_id不只是將設(shè)備類型添加到驅(qū)動(dòng)的
    //動(dòng)態(tài)鏈表询张,也會(huì)遍歷所有的設(shè)備將此類型的設(shè)備全部綁定到此驅(qū)動(dòng)。如果你只想綁定一個(gè)網(wǎng)卡浙炼,
    //結(jié)果把同類型的網(wǎng)卡都綁定了份氧,豈不是很尷尬。
    # For kernels >= 3.15 driver_override can be used to specify the driver
    # for a device rather than relying on the driver to provide a positive
    # match of the device.  The existing process of looking up
    # the vendor and device ID, adding them to the driver new_id,
    # will erroneously bind other devices too which has the additional burden
    # of unbinding those devices
    if driver in dpdk_drivers:
        filename = "/sys/bus/pci/devices/%s/driver_override" % dev_id
        if os.path.exists(filename):
            try:
                f = open(filename, "w")
            except:
                print("Error: bind failed for %s - Cannot open %s"
                      % (dev_id, filename))
                return
            try:
                f.write("%s" % driver)
                f.close()
            except:
                print("Error: bind failed for %s - Cannot write driver %s to "
                      "PCI ID " % (dev_id, driver))
                return
        # For kernels < 3.15 use new_id to add PCI id's to the driver
        else:
            filename = "/sys/bus/pci/drivers/%s/new_id" % driver
            try:
                f = open(filename, "w")
            except:
                print("Error: bind failed for %s - Cannot open %s"
                      % (dev_id, filename))
                return
            try:
                # Convert Device and Vendor Id to int to write to new_id
                f.write("%04x %04x" % (int(dev["Vendor"],16),
                        int(dev["Device"], 16)))
                f.close()
            except:
                print("Error: bind failed for %s - Cannot write new PCI ID to "
                      "driver %s" % (dev_id, driver))
                return

    //第二步是將網(wǎng)卡的pci地址寫入驅(qū)動(dòng)的文件 /sys/bus/pci/drivers/%s/bind弯屈,這樣就能將
    //網(wǎng)卡和驅(qū)動(dòng)綁定到一起蜗帜。
    # do the bind by writing to /sys
    filename = "/sys/bus/pci/drivers/%s/bind" % driver
    try:
        f = open(filename, "a")
    except:
        print("Error: bind failed for %s - Cannot open %s"
              % (dev_id, filename))
        if saved_driver is not None:  # restore any previous driver
            bind_one(dev_id, saved_driver, force)
        return
    try:
        f.write(dev_id)
        f.close()
    except:
        # for some reason, closing dev_id after adding a new PCI ID to new_id
        # results in IOError. however, if the device was successfully bound,
        # we don't care for any errors and can safely ignore IOError
        tmp = get_pci_device_details(dev_id, True)
        if "Driver_str" in tmp and tmp["Driver_str"] == driver:
            return
        print("Error: bind failed for %s - Cannot bind to driver %s"
              % (dev_id, driver))
        if saved_driver is not None:  # restore any previous driver
            bind_one(dev_id, saved_driver, force)
        return

    //對(duì)于kernel版本大于等于3.15的,還要將文件 driver_override 清空资厉,以便綁定到其他驅(qū)動(dòng)钮糖。
    # For kernels > 3.15 driver_override is used to bind a device to a driver.
    # Before unbinding it, overwrite driver_override with empty string so that
    # the device can be bound to any other driver
    filename = "/sys/bus/pci/devices/%s/driver_override" % dev_id
    if os.path.exists(filename):
        try:
            f = open(filename, "w")
        except:
            print("Error: unbind failed for %s - Cannot open %s"
                  % (dev_id, filename))
            sys.exit(1)
        try:
            f.write("\00")
            f.close()
        except:
            print("Error: unbind failed for %s - Cannot open %s"
                  % (dev_id, filename))
            sys.exit(1)

igb_uio驅(qū)動(dòng)的id_table為空,則在加載此驅(qū)動(dòng)時(shí)酌住,是不會(huì)匹配到任何設(shè)備的店归。

static struct pci_driver igbuio_pci_driver = {
    .name = "igb_uio",
    .id_table = NULL,  //DPDK 用到的 igb_uio, vfio-pci等驅(qū)動(dòng)的id_table默認(rèn)為空
    .probe = igbuio_pci_probe,
    .remove = igbuio_pci_remove,
};

經(jīng)過(guò)上面的分析,有三種方法可以將網(wǎng)卡綁定到驅(qū)動(dòng)igb_uio

a. 如果kernel版本大于等于3.15酪我,先向網(wǎng)卡的文件 /sys/bus/pci/devices/'pci address'/driver_override 寫入驅(qū)動(dòng)名字igb_uio消痛,再向驅(qū)動(dòng)igb_uio的文件 /sys/bus/pci/drivers/igb_uio/bind寫入網(wǎng)卡的pci地址即可。
b. 如果kernel版本大于等于3.15都哭,向驅(qū)動(dòng)igb_uio的文件 /sys/bus/pci/drivers/igb_uio/new_id寫入網(wǎng)卡的vendor和device id秩伞,則會(huì)自動(dòng)將所有此類型并且沒(méi)有綁定到任何驅(qū)動(dòng)的網(wǎng)卡綁定到igb_uio。
c. 如果kernel版本小于3.15欺矫,先向驅(qū)動(dòng)igb_uio的文件 /sys/bus/pci/drivers/igb_uio/new_id寫入網(wǎng)卡的vendor和device id纱新,再向驅(qū)動(dòng)igb_uio的文件 /sys/bus/pci/drivers/igb_uio/bind寫入網(wǎng)卡的pci地址即可。注意低版本的kernel穆趴,在向new_id寫入值時(shí)脸爱,只會(huì)將設(shè)備類型添加到此驅(qū)動(dòng)的動(dòng)態(tài)鏈表,而不會(huì)自動(dòng)探測(cè)設(shè)備未妹。

igb_uio probe
經(jīng)過(guò)前面的分析網(wǎng)卡綁定到了igb_uio驅(qū)動(dòng)后簿废,會(huì)調(diào)用驅(qū)動(dòng)的probe函數(shù)igbuio_pci_probe空入,主要做了如下幾個(gè)事情:
a. 調(diào)用pci_enable_device使能pci設(shè)備
b. 設(shè)置DMA mask
c. 填充struct uio_info信息,注冊(cè)u(píng)io設(shè)備
d. 注冊(cè)中斷處理函數(shù)

static int
igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
    struct rte_uio_pci_dev *udev;
    dma_addr_t map_dma_addr;
    void *map_addr;

    udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
    
    //使能pci設(shè)備
    /*
     * enable device: ask low-level code to enable I/O and
     * memory
     */
    pci_enable_device(dev);

    /* enable bus mastering on the device */
    pci_set_master(dev);
    
    //將設(shè)備的memory類型BAR信息保存到 struct uio_info->mem中族檬,
    //將設(shè)備的io類型BAR信息保存到 struct uio_info->port中
    /* remap IO memory */
    igbuio_setup_bars(dev, &udev->info);
    
    /* set 64-bit DMA mask */
    pci_set_dma_mask(dev,  DMA_BIT_MASK(64));
    pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));

    //填充 struct uio_info 其他字段
    /* fill uio infos */
    udev->info.name = "igb_uio";
    udev->info.version = "0.1";
    udev->info.irqcontrol = igbuio_pci_irqcontrol;
    udev->info.open = igbuio_pci_open;
    udev->info.release = igbuio_pci_release;
    udev->info.priv = udev;
    udev->pdev = dev;
    
    //創(chuàng)建 /sys/bus/pci/devices/'pci address'/max_vf 文件歪赢,
    //寫此文件用來(lái)生成 VF,這說(shuō)明即使網(wǎng)卡綁定到igb_uio口单料,仍然可以
    //生成 VF埋凯。
    sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
    
    //注冊(cè)u(píng)io,會(huì)生成 /dev/uiox 字符設(shè)備文件扫尖,
    //同時(shí)生成目錄 /sys/bus/pci/devices/'pci address'/uio/uiox
    /* register uio driver */
    uio_register_device(&dev->dev, &udev->info);

    //保存 struct rte_uio_pci_dev 到 dev->driver_data
    pci_set_drvdata(dev, udev);
        dev_set_drvdata(&pdev->dev, data);
            dev->driver_data = data;

宏uio_register_device用來(lái)注冊(cè)u(píng)io設(shè)備递鹉。

/* use a define to avoid include chaining to get THIS_MODULE */
#define uio_register_device(parent, info) \
    __uio_register_device(THIS_MODULE, parent, info)

int __uio_register_device(struct module *owner,
              struct device *parent,
              struct uio_info *info)
    //根據(jù) uio_info 生成 uio_device
    struct uio_device *idev;
    idev = devm_kzalloc(parent, sizeof(*idev), GFP_KERNEL);

    idev->owner = owner;
    idev->info = info;
    init_waitqueue_head(&idev->wait);
    atomic_set(&idev->event, 0);

    //分配最小未使用的id,保存到 idev->minor
    uio_get_minor(idev);
    //創(chuàng)建字符設(shè)備 /dev/uiox
    idev->dev = device_create(&uio_class, parent, MKDEV(uio_major, idev->minor), idev, "uio%d", idev->minor);
    
    //在 /sys/class/uio/uiox/下創(chuàng)建maps目錄藏斩,maps目錄下根據(jù) struct uio_info->mem和port信息
    //分別生成 mapx 和 portx 等目錄躏结,這些目錄下又存放對(duì)應(yīng)類型的信息,比如起始地址狰域,name媳拴,offset和size。
    //用戶態(tài)可以通過(guò)mmap mapx下的文件來(lái)操作網(wǎng)卡寄存器兆览。
    //但是DPDK沒(méi)有使用此方法屈溉,而是直接mmap /sys/bus/pci/devices/'pci address'/resource0 文件實(shí)現(xiàn)。
    uio_dev_add_attributes(idev);
    info->uio_dev = idev;
    
    //注冊(cè)中斷抬探。但是在新版本的DPDK中子巾,注冊(cè)u(píng)io時(shí)沒(méi)有分配info->irq來(lái)注冊(cè)中斷,
    //而是在用戶態(tài) open /dev/uiox 時(shí)小压,在函數(shù) igbuio_pci_open 中注冊(cè)中斷线梗。
    if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
        devm_request_irq(idev->dev, info->irq, uio_interrupt, info->irq_flags, info->name, idev);
    }

簡(jiǎn)單總結(jié)一下,igb_uio是DPDK使用網(wǎng)卡的一個(gè)通用驅(qū)動(dòng)怠益,不只intel網(wǎng)卡可以用仪搔,其他廠商的網(wǎng)卡也可以用(有一個(gè)例外,mellanox的網(wǎng)卡不用綁定到igb_uio就能被使用DPDK)蜻牢,因?yàn)樗皇鼓芰藀ci設(shè)備烤咧,注冊(cè)u(píng)io,和注冊(cè)中斷處理函數(shù)抢呆,這些工作是不區(qū)分網(wǎng)卡類型的煮嫌。
加載igb_uio時(shí),不會(huì)自動(dòng)探測(cè)pci設(shè)備抱虐,而是需要寫sys文件將設(shè)備綁定到igb_uio昌阿。

igb_uio依賴uio驅(qū)動(dòng),注冊(cè)u(píng)io設(shè)備后,會(huì)生成/dev/uiox宝泵,和網(wǎng)卡一一對(duì)應(yīng),用戶態(tài)可以poll /dev/uiox監(jiān)聽中斷是否到來(lái)轩娶。
同時(shí)uio設(shè)備還會(huì)將網(wǎng)卡的BAR地址通過(guò)sys文件系統(tǒng)暴露出去儿奶,用戶態(tài)可以mmap sys文件后操作網(wǎng)卡寄存器。但是DPDK沒(méi)有采用這種方式鳄抒,而是直接mmap網(wǎng)卡自身暴露出去的sys文件 /sys/bus/pci/devices/'pci address'/resource0闯捎。

參考

https://www.cnblogs.com/jungle1996/p/12398915.html
https://www.cnblogs.com/jungle1996/p/12452636.html

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者
  • 序言:七十年代末,一起剝皮案震驚了整個(gè)濱河市许溅,隨后出現(xiàn)的幾起案子瓤鼻,更是在濱河造成了極大的恐慌,老刑警劉巖贤重,帶你破解...
    沈念sama閱讀 219,039評(píng)論 6 508
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件茬祷,死亡現(xiàn)場(chǎng)離奇詭異,居然都是意外死亡并蝗,警方通過(guò)查閱死者的電腦和手機(jī)祭犯,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 93,426評(píng)論 3 395
  • 文/潘曉璐 我一進(jìn)店門,熙熙樓的掌柜王于貴愁眉苦臉地迎上來(lái)滚停,“玉大人沃粗,你說(shuō)我怎么就攤上這事〖耄” “怎么了最盅?”我有些...
    開封第一講書人閱讀 165,417評(píng)論 0 356
  • 文/不壞的土叔 我叫張陵,是天一觀的道長(zhǎng)起惕。 經(jīng)常有香客問(wèn)我涡贱,道長(zhǎng),這世上最難降的妖魔是什么惹想? 我笑而不...
    開封第一講書人閱讀 58,868評(píng)論 1 295
  • 正文 為了忘掉前任盼产,我火速辦了婚禮,結(jié)果婚禮上勺馆,老公的妹妹穿的比我還像新娘戏售。我一直安慰自己,他們只是感情好草穆,可當(dāng)我...
    茶點(diǎn)故事閱讀 67,892評(píng)論 6 392
  • 文/花漫 我一把揭開白布灌灾。 她就那樣靜靜地躺著,像睡著了一般悲柱。 火紅的嫁衣襯著肌膚如雪锋喜。 梳的紋絲不亂的頭發(fā)上,一...
    開封第一講書人閱讀 51,692評(píng)論 1 305
  • 那天,我揣著相機(jī)與錄音嘿般,去河邊找鬼段标。 笑死,一個(gè)胖子當(dāng)著我的面吹牛炉奴,可吹牛的內(nèi)容都是我干的逼庞。 我是一名探鬼主播,決...
    沈念sama閱讀 40,416評(píng)論 3 419
  • 文/蒼蘭香墨 我猛地睜開眼瞻赶,長(zhǎng)吁一口氣:“原來(lái)是場(chǎng)噩夢(mèng)啊……” “哼赛糟!你這毒婦竟也來(lái)了?” 一聲冷哼從身側(cè)響起砸逊,我...
    開封第一講書人閱讀 39,326評(píng)論 0 276
  • 序言:老撾萬(wàn)榮一對(duì)情侶失蹤璧南,失蹤者是張志新(化名)和其女友劉穎,沒(méi)想到半個(gè)月后师逸,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體司倚,經(jīng)...
    沈念sama閱讀 45,782評(píng)論 1 316
  • 正文 獨(dú)居荒郊野嶺守林人離奇死亡,尸身上長(zhǎng)有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點(diǎn)故事閱讀 37,957評(píng)論 3 337
  • 正文 我和宋清朗相戀三年篓像,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了对湃。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
    茶點(diǎn)故事閱讀 40,102評(píng)論 1 350
  • 序言:一個(gè)原本活蹦亂跳的男人離奇死亡遗淳,死狀恐怖沐祷,靈堂內(nèi)的尸體忽然破棺而出袖外,到底是詐尸還是另有隱情炬太,我是刑警寧澤屏富,帶...
    沈念sama閱讀 35,790評(píng)論 5 346
  • 正文 年R本政府宣布,位于F島的核電站养叛,受9級(jí)特大地震影響种呐,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜弃甥,卻給世界環(huán)境...
    茶點(diǎn)故事閱讀 41,442評(píng)論 3 331
  • 文/蒙蒙 一爽室、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧淆攻,春花似錦阔墩、人聲如沸。這莊子的主人今日做“春日...
    開封第一講書人閱讀 31,996評(píng)論 0 22
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽(yáng)。三九已至伞芹,卻和暖如春忘苛,著一層夾襖步出監(jiān)牢的瞬間蝉娜,已是汗流浹背。 一陣腳步聲響...
    開封第一講書人閱讀 33,113評(píng)論 1 272
  • 我被黑心中介騙來(lái)泰國(guó)打工扎唾, 沒(méi)想到剛下飛機(jī)就差點(diǎn)兒被人妖公主榨干…… 1. 我叫王不留召川,地道東北人。 一個(gè)月前我還...
    沈念sama閱讀 48,332評(píng)論 3 373
  • 正文 我出身青樓胸遇,卻偏偏與公主長(zhǎng)得像荧呐,于是被迫代替她去往敵國(guó)和親。 傳聞我的和親對(duì)象是個(gè)殘疾皇子狐榔,可洞房花燭夜當(dāng)晚...
    茶點(diǎn)故事閱讀 45,044評(píng)論 2 355

推薦閱讀更多精彩內(nèi)容