本文整理下之前的學(xué)習(xí)筆記,基于DPDK17.11版本源碼分析痪欲。主要分析一下igb_uio驅(qū)動(dòng)源碼界拦。
總線-設(shè)備-驅(qū)動(dòng)
首先簡(jiǎn)單介紹一下kernel中的總線-設(shè)備-驅(qū)動(dòng)模型,以pci總線為例两嘴,pci總線上有兩個(gè)表,一個(gè)用于保存系統(tǒng)中的pci設(shè)備吃溅,一個(gè)用于保存pci設(shè)備對(duì)應(yīng)的驅(qū)動(dòng)溶诞。每當(dāng)加載pci設(shè)備驅(qū)動(dòng)時(shí)鸯檬,就會(huì)遍歷pci總線上的pci設(shè)備進(jìn)行匹配决侈,每當(dāng)插入pci設(shè)備到系統(tǒng)中時(shí),熱插拔機(jī)制就會(huì)自動(dòng)遍歷pci總線上的pci設(shè)備驅(qū)動(dòng)進(jìn)行匹配喧务,如果匹配成功則使用此驅(qū)動(dòng)初始化設(shè)備赖歌。
注冊(cè)pci總線
可以調(diào)用bus_register注冊(cè)總線。比如下面的pci總線功茴,平臺(tái)總線和usb總線等庐冯。
//注冊(cè)pci總線
struct bus_type pci_bus_type = {
.name = "pci",
.match = pci_bus_match,
.uevent = pci_uevent,
.probe = pci_device_probe,
.remove = pci_device_remove,
.shutdown = pci_device_shutdown,
.dev_groups = pci_dev_groups,
.bus_groups = pci_bus_groups,
.drv_groups = pci_drv_groups,
.pm = PCI_PM_OPS_PTR,
};
bus_register(&pci_bus_type);
//注冊(cè)平臺(tái)總線
struct bus_type platform_bus_type = {
.name = "platform",
.dev_groups = platform_dev_groups,
.match = platform_match,
.uevent = platform_uevent,
.pm = &platform_dev_pm_ops,
};
bus_register(&platform_bus_type);
//注冊(cè)u(píng)sb總線
struct bus_type usb_bus_type = {
.name = "usb",
.match = usb_device_match,
.uevent = usb_uevent,
};
bus_register(&usb_bus_type);
//注冊(cè)virtio總線
static struct bus_type virtio_bus = {
.name = "virtio",
.match = virtio_dev_match,
.dev_groups = virtio_dev_groups,
.uevent = virtio_uevent,
.probe = virtio_dev_probe,
.remove = virtio_dev_remove,
};
bus_register(&virtio_bus)
注冊(cè)總線后,會(huì)在 /sys/bus 下生成總線目錄坎穿,比如 pci 總線會(huì)生成目錄 /sys/bus/pci
/**
* bus_register - register a driver-core subsystem
* @bus: bus to register
*
* Once we have that, we register the bus with the kobject
* infrastructure, then register the children subsystems it has:
* the devices and drivers that belong to the subsystem.
*/
int bus_register(struct bus_type *bus)
struct subsys_private *priv;
struct lock_class_key *key = &bus->lock_key;
priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL);
priv->bus = bus;
bus->p = priv;
kobject_set_name(&priv->subsys.kobj, "%s", bus->name);
priv->subsys.kobj.kset = bus_kset;
priv->subsys.kobj.ktype = &bus_ktype;
kset_register(&priv->subsys);
//此值為1加載驅(qū)動(dòng)時(shí)會(huì)自動(dòng)探測(cè)設(shè)備進(jìn)行匹配
priv->drivers_autoprobe = 1;
bus_create_file(bus, &bus_attr_uevent);
//在總線目錄下展父,生成 devices 子目錄返劲,下面再包含具體pci設(shè)備子目錄
priv->devices_kset = kset_create_and_add("devices", NULL,
&priv->subsys.kobj);
//在總線目錄下,生成 drivers 子目錄栖茉,下面再包含具體驅(qū)動(dòng)子目錄
priv->drivers_kset = kset_create_and_add("drivers", NULL,
&priv->subsys.kobj);
//此鏈表用于保存加載的pci設(shè)備驅(qū)動(dòng)
klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put);
//此鏈表用于保存掃描到的pci設(shè)備
klist_init(&priv->klist_drivers, NULL, NULL);
//在sys文件系統(tǒng)創(chuàng)建 drivers_probe 和 drivers_autoprobe 文件
add_probe_files(bus);
bus_create_file(bus, &bus_attr_drivers_probe);
bus_create_file(bus, &bus_attr_drivers_autoprobe);
bus_add_groups(bus, bus->bus_groups);
注冊(cè)總線后篮绿,會(huì)生成文件/sys/bus/pci/drivers_autoprobe,寫此文件時(shí)在kernel中會(huì)調(diào)用如下函數(shù)吕漂,如果為1亲配,表示 bus 支持自動(dòng)探測(cè) device,則加載驅(qū)動(dòng)時(shí)惶凝,自動(dòng)遍歷所有pci設(shè)備進(jìn)行匹配
store_drivers_autoprobe
static ssize_t store_drivers_autoprobe(struct bus_type *bus,
const char *buf, size_t count)
{
if (buf[0] == '0')
bus->p->drivers_autoprobe = 0;
else
bus->p->drivers_autoprobe = 1;
return count;
}
注冊(cè)驅(qū)動(dòng)到pci總線
結(jié)構(gòu)體struct pci_driver表示一個(gè)pci設(shè)備驅(qū)動(dòng)吼虎,其中id_table和dynids用來(lái)保存此驅(qū)動(dòng)支持的設(shè)備id等信息,如果有匹配的設(shè)備苍鲜,則調(diào)用probe函數(shù)思灰。
struct pci_driver {
struct list_head node;
const char *name;
//靜態(tài)table,用來(lái)保存驅(qū)動(dòng)支持的id
const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */
int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */
void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */
int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */
int (*suspend_late) (struct pci_dev *dev, pm_message_t state);
int (*resume_early) (struct pci_dev *dev);
int (*resume) (struct pci_dev *dev); /* Device woken up */
void (*shutdown) (struct pci_dev *dev);
int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */
const struct pci_error_handlers *err_handler;
struct device_driver driver;
//動(dòng)態(tài)table坡贺,通過(guò)寫文件 new_id 動(dòng)態(tài)添加id
struct pci_dynids dynids;
};
調(diào)用函數(shù)pci_register_driver注冊(cè)pci設(shè)備驅(qū)動(dòng)官辈。
static struct pci_driver igbuio_pci_driver = {
.name = "igb_uio",
.id_table = NULL, //DPDK 用到的 igb_uio, vfio-pci等驅(qū)動(dòng)的id_table默認(rèn)為空
.probe = igbuio_pci_probe,
.remove = igbuio_pci_remove,
};
pci_register_driver(&igbuio_pci_driver);
static const struct pci_device_id igb_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
...
}
static struct pci_driver igb_driver = {
.name = igb_driver_name,
.id_table = igb_pci_tbl, //正常的kernel驅(qū)動(dòng)都有一個(gè)靜態(tài)的id_table
.probe = igb_probe,
.remove = igb_remove,
#ifdef CONFIG_PM
.driver.pm = &igb_pm_ops,
#endif
.shutdown = igb_shutdown,
.sriov_configure = igb_pci_sriov_configure,
.err_handler = &igb_err_handler
};
pci_register_driver(&igb_driver);
注冊(cè)驅(qū)動(dòng)后,會(huì)在/sys/bus/pci/drivers目錄下創(chuàng)建以驅(qū)動(dòng)名字命名的目錄遍坟,并在此目錄下創(chuàng)建new_id, bind和unbind等sys文件拳亿,可以通過(guò)這些文件動(dòng)態(tài)修改驅(qū)動(dòng)信息。
/*
* pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded
*/
#define pci_register_driver(driver) \
__pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
int __pci_register_driver(struct pci_driver *drv, struct module *owner,
const char *mod_name)
{
/* initialize common driver fields */
drv->driver.name = drv->name;
//bus固定為 pci_bus_type
drv->driver.bus = &pci_bus_type;
drv->driver.owner = owner;
drv->driver.mod_name = mod_name;
spin_lock_init(&drv->dynids.lock);
INIT_LIST_HEAD(&drv->dynids.list);
/* register with core */
driver_register(&drv->driver);
bus_add_driver(drv);
struct bus_type *bus;
struct driver_private *priv;
bus = bus_get(drv->bus);
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
klist_init(&priv->klist_devices, NULL, NULL);
priv->driver = drv;
drv->p = priv;
priv->kobj.kset = bus->p->drivers_kset;
kobject_init_and_add(&priv->kobj, &driver_ktype, NULL, "%s", drv->name);
//將驅(qū)動(dòng)添加到pci總線
klist_add_tail(&priv->knode_bus, &bus->p->klist_drivers);
//如果pci總線支持自動(dòng)探測(cè)設(shè)備愿伴,則在加載驅(qū)動(dòng)時(shí)就遍歷所有pci設(shè)備進(jìn)行匹配
if (drv->bus->p->drivers_autoprobe) {
driver_attach(drv);
//遍歷所有的pci設(shè)備肺魁,和drv進(jìn)行匹配
bus_for_each_dev(drv->bus, NULL, drv, __driver_attach);
//設(shè)備和驅(qū)動(dòng)進(jìn)行匹配
driver_match_device(drv, dev)
//如果匹配成功,并且設(shè)備還沒(méi)有加載其他驅(qū)動(dòng)隔节,則使用當(dāng)前驅(qū)動(dòng)drv
if (!dev->driver)
driver_probe_device(drv, dev);
}
module_add_driver(drv->owner, drv);
driver_create_file(drv, &driver_attr_uevent);
//bus->drv_groups 為 pci_drv_groups鹅经,
//在sys文件系統(tǒng)創(chuàng)建 new_id 和 remove_id 文件
driver_add_groups(drv, bus->drv_groups);
//在sys文件系統(tǒng)創(chuàng)建 bind 和 unbind 文件,用來(lái)將驅(qū)動(dòng)綁定和解綁定設(shè)備
if (!drv->suppress_bind_attrs) {
add_bind_files(drv);
driver_create_file(drv, &driver_attr_unbind);
driver_create_file(drv, &driver_attr_bind);
}
}
向new_id寫入"0x0806 0x1521"信息(0x0806表示vendor id怎诫,0x1521為device id)時(shí)瘾晃,會(huì)調(diào)用kernel中的store_new_id,解析相關(guān)字段后幻妓,保存到動(dòng)態(tài)鏈表dynids蹦误,然后遍歷當(dāng)前所有的pci設(shè)備進(jìn)行匹配。
//定義struct driver_attribute driver_attr_new_id
static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
//定義 //struct driver_attribute driver_attr_remove_id
static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
//定義 struct attribute_group pci_drv_groups
static struct attribute *pci_drv_attrs[] = {
&driver_attr_new_id.attr,
&driver_attr_remove_id.attr,
NULL,
};
ATTRIBUTE_GROUPS(pci_drv);
static ssize_t store_new_id(struct device_driver *driver, const char *buf,size_t count)
fields = sscanf(buf, "%x %x %x %x %x %x %lx",
&vendor, &device, &subvendor, &subdevice,
&class, &class_mask, &driver_data);
if (fields < 2)
return -EINVAL;
pci_add_dynid(pdrv, vendor, device, subvendor, subdevice, class, class_mask, driver_data);
struct pci_dynid *dynid;
dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
dynid->id.vendor = vendor;
dynid->id.device = device;
dynid->id.subvendor = subvendor;
dynid->id.subdevice = subdevice;
dynid->id.class = class;
dynid->id.class_mask = class_mask;
dynid->id.driver_data = driver_data;
spin_lock(&drv->dynids.lock);
list_add_tail(&dynid->node, &drv->dynids.list);
spin_unlock(&drv->dynids.lock);
//設(shè)置new id時(shí)肉津,也會(huì)自動(dòng)匹配設(shè)備
return driver_attach(&drv->driver);
向bind文件寫入網(wǎng)卡的pci地址時(shí)强胰,會(huì)調(diào)用kernel中的bind_store,將此網(wǎng)卡綁定到此驅(qū)動(dòng)妹沙。
向unbind文件寫入網(wǎng)卡的pci地址時(shí)偶洋,會(huì)調(diào)用kernel中的unbind_store,將此網(wǎng)卡和此驅(qū)動(dòng)解綁距糖。
//定義 struct driver_attribute driver_attr_bind玄窝,寫文件時(shí)牵寺,調(diào)用 bind_store
static DRIVER_ATTR_WO(bind);
//定義 struct driver_attribute driver_attr_unbind,寫文件時(shí)恩脂,調(diào)用 unbind_store
static DRIVER_ATTR_WO(unbind);
/*
* Manually attach a device to a driver.
* Note: the driver must want to bind to the device,
* it is not possible to override the driver's id table.
*/
static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count)
dev = bus_find_device_by_name(bus, NULL, buf);
if (dev && dev->driver == NULL && driver_match_device(drv, dev)) {
if (dev->parent) /* Needed for USB */
device_lock(dev->parent);
device_lock(dev);
err = driver_probe_device(drv, dev);
device_unlock(dev);
if (dev->parent)
device_unlock(dev->parent);
if (err > 0) {
/* success */
err = count;
} else if (err == 0) {
/* driver didn't accept device */
err = -ENODEV;
}
}
/* Manually detach a device from its associated driver. */
static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count)
{
struct bus_type *bus = bus_get(drv->bus);
struct device *dev;
int err = -ENODEV;
dev = bus_find_device_by_name(bus, NULL, buf);
if (dev && dev->driver == drv) {
if (dev->parent) /* Needed for USB */
device_lock(dev->parent);
device_release_driver(dev);
if (dev->parent)
device_unlock(dev->parent);
err = count;
}
put_device(dev);
bus_put(bus);
return err;
}
發(fā)現(xiàn)pci設(shè)備
系統(tǒng)啟動(dòng)時(shí)會(huì)掃描所有的pci設(shè)備缸剪,以他們的pci地址為名字創(chuàng)建目錄,并在此目錄下創(chuàng)建相關(guān)的sys文件东亦。并且會(huì)遍歷所有的pci設(shè)備驅(qū)動(dòng)進(jìn)行匹配杏节。
pci_scan_root_bus
pci_scan_child_bus(b);
pci_scan_slot
pci_scan_single_device
pci_scan_device
pci_device_add
device_add(&dev->dev);
bus_add_device(dev);
//bus->dev_groups為pci_dev_groups,
//會(huì)在 /sys/bus/pci/devices/'pci address'/ 目錄下創(chuàng)建 vendor, device等目錄
device_add_groups(dev, bus->dev_groups);
//將設(shè)備添加到pci總線鏈表
klist_add_tail(&dev->p->knode_bus, &bus->p->klist_devices);
pci_bus_add_devices
pci_bus_add_device
pci_create_sysfs_dev_files(dev);
//如果pci配置空間大于 PCI_CFG_SPACE_SIZE(256字節(jié)),則創(chuàng)建 /sys/bus/pci/devices/0000:81:00.0/config文件典阵,
//大小為 4096 字節(jié)
if (pdev->cfg_size > PCI_CFG_SPACE_SIZE)
retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
else //否則config文件大小為 256 字節(jié)
retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
//創(chuàng)建 resource 文件奋渔,用戶態(tài)可以使用mmap映射 resource0 實(shí)現(xiàn)對(duì)網(wǎng)卡寄存器的操作
pci_create_resource_files(pdev);
//創(chuàng)建 /sys/bus/pci/devices/0000:81:00.0/resource0 等文件
/* Expose the PCI resources from this device as files */
for (i = 0; i < PCI_ROM_RESOURCE; i++) {
/* skip empty resources */
if (!pci_resource_len(pdev, i))
continue;
retval = pci_create_attr(pdev, i, 0);
struct bin_attribute *res_attr;
res_attr = kzalloc(sizeof(*res_attr) + name_len, GFP_ATOMIC);
sysfs_bin_attr_init(res_attr);
if (write_combine) {
pdev->res_attr_wc[num] = res_attr;
sprintf(res_attr_name, "resource%d_wc", num);
res_attr->mmap = pci_mmap_resource_wc;
} else {
pdev->res_attr[num] = res_attr;
sprintf(res_attr_name, "resource%d", num);
res_attr->mmap = pci_mmap_resource_uc;
}
if (pci_resource_flags(pdev, num) & IORESOURCE_IO) {
res_attr->read = pci_read_resource_io;
res_attr->write = pci_write_resource_io;
}
res_attr->attr.name = res_attr_name;
res_attr->attr.mode = S_IRUSR | S_IWUSR;
res_attr->size = pci_resource_len(pdev, num);
res_attr->private = &pdev->resource[num];
//創(chuàng)建 kernel 文件
sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
/* for prefetchable resources, create a WC mappable file */
if (!retval && pdev->resource[i].flags & IORESOURCE_PREFETCH)
retval = pci_create_attr(pdev, i, 1);
}
//嘗試匹配驅(qū)動(dòng)
device_attach(&dev->dev);
//遍歷所有driver,查看是否有匹配此設(shè)備的driver
bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
//判斷驅(qū)動(dòng)和設(shè)備是否匹配
driver_match_device
//pci_bus_match
drv->bus->match
pci_match_device(pci_drv, pci_dev);
//如果有匹配的壮啊,則調(diào)用驅(qū)動(dòng)的probe函數(shù)
driver_probe_device
really_probe(dev, drv);
//pci_device_probe
dev->bus->probe
__pci_device_probe
pci_call_probe
local_pci_probe
pci_drv->probe(pci_dev, ddi->id);
向設(shè)備的driver_override文件寫入驅(qū)動(dòng)名字嫉鲸,表示此設(shè)備只能綁定到此驅(qū)動(dòng)。
static ssize_t driver_override_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
struct pci_dev *pdev = to_pci_dev(dev);
driver_override = kstrndup(buf, count, GFP_KERNEL);
pdev->driver_override = driver_override;
如何匹配歹啼?
前面多次提到設(shè)備和驅(qū)動(dòng)進(jìn)行匹配玄渗,究竟如何匹配呢?
先看一下用來(lái)表示一個(gè)pci設(shè)備的結(jié)構(gòu)體pci_dev狸眼,其中如下幾個(gè)成員變量表示此pci設(shè)備的類型藤树,一般vendor和device就足夠,vendor表示此設(shè)備是哪個(gè)廠商的拓萌,device表示此設(shè)備的類型岁钓。
struct pci_dev {
...
unsigned short vendor;
unsigned short device;
unsigned short subsystem_vendor;
unsigned short subsystem_device;
unsigned int class; /* 3 bytes: (base,sub,prog-if) */
...
}
再看一下用來(lái)表示設(shè)備驅(qū)動(dòng)的pci_driver,其中id_table和dynids用來(lái)保存此驅(qū)動(dòng)支持的設(shè)備類型微王,前者是靜態(tài)值屡限,后者可以通過(guò)驅(qū)動(dòng)目錄下的new_id動(dòng)態(tài)添加。設(shè)備類型使用pci_device_id結(jié)構(gòu)體來(lái)表示炕倘,其成員變量也是vendor,device等信息钧大,和pci_dev中的信息是一樣的,所以可以使用這幾個(gè)字段進(jìn)行匹配罩旋。
struct pci_device_id {
__u32 vendor, device; /* Vendor and device ID or PCI_ANY_ID*/
__u32 subvendor, subdevice; /* Subsystem ID's or PCI_ANY_ID */
__u32 class, class_mask; /* (class,subclass,prog-if) triplet */
kernel_ulong_t driver_data; /* Data private to the driver */
};
struct pci_driver {
struct pci_device_id *id_table
struct pci_dynids dynids;
...
}
最終使用函數(shù)pci_match_device進(jìn)行驅(qū)動(dòng)和設(shè)備的匹配啊央。
static const struct pci_device_id pci_device_id_any = {
.vendor = PCI_ANY_ID,
.device = PCI_ANY_ID,
.subvendor = PCI_ANY_ID,
.subdevice = PCI_ANY_ID,
};
static const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_dev *dev)
//如果設(shè)備設(shè)置了 driver_override,則只能綁定到driver_override指定的驅(qū)動(dòng)上瘸恼。
//如果不是此驅(qū)動(dòng)直接返回NULL
/* When driver_override is set, only bind to the matching driver */
if (dev->driver_override && strcmp(dev->driver_override, drv->name))
return NULL;
//首先查找驅(qū)動(dòng)的動(dòng)態(tài)鏈表和設(shè)備進(jìn)行匹配
/* Look at the dynamic ids first, before the static ones */
spin_lock(&drv->dynids.lock);
list_for_each_entry(dynid, &drv->dynids.list, node) {
if (pci_match_one_device(&dynid->id, dev)) {
found_id = &dynid->id;
break;
}
}
spin_unlock(&drv->dynids.lock);
//如果沒(méi)匹配到劣挫,則查找驅(qū)動(dòng)的靜態(tài)table
if (!found_id)
found_id = pci_match_id(drv->id_table, dev);
while (ids->vendor || ids->subvendor || ids->class_mask) {
if (pci_match_one_device(ids, dev))
return ids;
ids++;
}
//如果仍然沒(méi)匹配到册养,但是指定了驅(qū)動(dòng)东帅,則強(qiáng)制認(rèn)為匹配成功,返回 pci_device_id_any
/* driver_override will always match, send a dummy id */
if (!found_id && dev->driver_override)
found_id = &pci_device_id_any;
return found_id;
//具體的匹配規(guī)則
static inline const struct pci_device_id *
pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
{
if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
(id->device == PCI_ANY_ID || id->device == dev->device) &&
(id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
(id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
!((id->class ^ dev->class) & id->class_mask))
return id;
return NULL;
}
綁定到 igb_uio 驅(qū)動(dòng)
網(wǎng)卡如何綁定到igb_uio驅(qū)動(dòng)呢球拦?這里拿DPDK提供的腳步文件dpdk-devbind.py中的函數(shù)bind_one進(jìn)行分析靠闭。
def bind_one(dev_id, driver, force):
'''Bind the device given by "dev_id" to the driver "driver". If the device
is already bound to a different driver, it will be unbound first'''
dev = devices[dev_id]
saved_driver = None # used to rollback any unbind in case of failure
//如果網(wǎng)卡已經(jīng)綁定到某個(gè)驅(qū)動(dòng)帐我,則判斷是否是要綁定的驅(qū)動(dòng),如果是則返回愧膀,
//如果不是拦键,則解綁之前的驅(qū)動(dòng)。unbind_one只要向驅(qū)動(dòng)的unbind寫入此網(wǎng)卡的pci地址即可解綁檩淋。
# unbind any existing drivers we don't want
if has_driver(dev_id):
if dev["Driver_str"] == driver:
print("%s already bound to driver %s, skipping\n"
% (dev_id, driver))
return
else:
saved_driver = dev["Driver_str"]
unbind_one(dev_id, force)
dev["Driver_str"] = "" # clear driver string
//綁定方法根據(jù)kernel版本有不同的綁定方法芬为。
//對(duì)于kernel版本大于等于3.15的,首先將驅(qū)動(dòng)名字寫入到網(wǎng)卡的文件 driver_override來(lái)指定此驅(qū)動(dòng)蟀悦。
//而小于3.15的媚朦,需要將網(wǎng)卡的vendor和device id寫入驅(qū)動(dòng)的new_id文件。
//為什么大于等于3.15的不使用new_id呢日戈?這是因?yàn)楦甙姹镜膎ew_id不只是將設(shè)備類型添加到驅(qū)動(dòng)的
//動(dòng)態(tài)鏈表询张,也會(huì)遍歷所有的設(shè)備將此類型的設(shè)備全部綁定到此驅(qū)動(dòng)。如果你只想綁定一個(gè)網(wǎng)卡浙炼,
//結(jié)果把同類型的網(wǎng)卡都綁定了份氧,豈不是很尷尬。
# For kernels >= 3.15 driver_override can be used to specify the driver
# for a device rather than relying on the driver to provide a positive
# match of the device. The existing process of looking up
# the vendor and device ID, adding them to the driver new_id,
# will erroneously bind other devices too which has the additional burden
# of unbinding those devices
if driver in dpdk_drivers:
filename = "/sys/bus/pci/devices/%s/driver_override" % dev_id
if os.path.exists(filename):
try:
f = open(filename, "w")
except:
print("Error: bind failed for %s - Cannot open %s"
% (dev_id, filename))
return
try:
f.write("%s" % driver)
f.close()
except:
print("Error: bind failed for %s - Cannot write driver %s to "
"PCI ID " % (dev_id, driver))
return
# For kernels < 3.15 use new_id to add PCI id's to the driver
else:
filename = "/sys/bus/pci/drivers/%s/new_id" % driver
try:
f = open(filename, "w")
except:
print("Error: bind failed for %s - Cannot open %s"
% (dev_id, filename))
return
try:
# Convert Device and Vendor Id to int to write to new_id
f.write("%04x %04x" % (int(dev["Vendor"],16),
int(dev["Device"], 16)))
f.close()
except:
print("Error: bind failed for %s - Cannot write new PCI ID to "
"driver %s" % (dev_id, driver))
return
//第二步是將網(wǎng)卡的pci地址寫入驅(qū)動(dòng)的文件 /sys/bus/pci/drivers/%s/bind弯屈,這樣就能將
//網(wǎng)卡和驅(qū)動(dòng)綁定到一起蜗帜。
# do the bind by writing to /sys
filename = "/sys/bus/pci/drivers/%s/bind" % driver
try:
f = open(filename, "a")
except:
print("Error: bind failed for %s - Cannot open %s"
% (dev_id, filename))
if saved_driver is not None: # restore any previous driver
bind_one(dev_id, saved_driver, force)
return
try:
f.write(dev_id)
f.close()
except:
# for some reason, closing dev_id after adding a new PCI ID to new_id
# results in IOError. however, if the device was successfully bound,
# we don't care for any errors and can safely ignore IOError
tmp = get_pci_device_details(dev_id, True)
if "Driver_str" in tmp and tmp["Driver_str"] == driver:
return
print("Error: bind failed for %s - Cannot bind to driver %s"
% (dev_id, driver))
if saved_driver is not None: # restore any previous driver
bind_one(dev_id, saved_driver, force)
return
//對(duì)于kernel版本大于等于3.15的,還要將文件 driver_override 清空资厉,以便綁定到其他驅(qū)動(dòng)钮糖。
# For kernels > 3.15 driver_override is used to bind a device to a driver.
# Before unbinding it, overwrite driver_override with empty string so that
# the device can be bound to any other driver
filename = "/sys/bus/pci/devices/%s/driver_override" % dev_id
if os.path.exists(filename):
try:
f = open(filename, "w")
except:
print("Error: unbind failed for %s - Cannot open %s"
% (dev_id, filename))
sys.exit(1)
try:
f.write("\00")
f.close()
except:
print("Error: unbind failed for %s - Cannot open %s"
% (dev_id, filename))
sys.exit(1)
igb_uio驅(qū)動(dòng)的id_table為空,則在加載此驅(qū)動(dòng)時(shí)酌住,是不會(huì)匹配到任何設(shè)備的店归。
static struct pci_driver igbuio_pci_driver = {
.name = "igb_uio",
.id_table = NULL, //DPDK 用到的 igb_uio, vfio-pci等驅(qū)動(dòng)的id_table默認(rèn)為空
.probe = igbuio_pci_probe,
.remove = igbuio_pci_remove,
};
經(jīng)過(guò)上面的分析,有三種方法可以將網(wǎng)卡綁定到驅(qū)動(dòng)igb_uio
a. 如果kernel版本大于等于3.15酪我,先向網(wǎng)卡的文件 /sys/bus/pci/devices/'pci address'/driver_override 寫入驅(qū)動(dòng)名字igb_uio消痛,再向驅(qū)動(dòng)igb_uio的文件 /sys/bus/pci/drivers/igb_uio/bind寫入網(wǎng)卡的pci地址即可。
b. 如果kernel版本大于等于3.15都哭,向驅(qū)動(dòng)igb_uio的文件 /sys/bus/pci/drivers/igb_uio/new_id寫入網(wǎng)卡的vendor和device id秩伞,則會(huì)自動(dòng)將所有此類型并且沒(méi)有綁定到任何驅(qū)動(dòng)的網(wǎng)卡綁定到igb_uio。
c. 如果kernel版本小于3.15欺矫,先向驅(qū)動(dòng)igb_uio的文件 /sys/bus/pci/drivers/igb_uio/new_id寫入網(wǎng)卡的vendor和device id纱新,再向驅(qū)動(dòng)igb_uio的文件 /sys/bus/pci/drivers/igb_uio/bind寫入網(wǎng)卡的pci地址即可。注意低版本的kernel穆趴,在向new_id寫入值時(shí)脸爱,只會(huì)將設(shè)備類型添加到此驅(qū)動(dòng)的動(dòng)態(tài)鏈表,而不會(huì)自動(dòng)探測(cè)設(shè)備未妹。
igb_uio probe
經(jīng)過(guò)前面的分析網(wǎng)卡綁定到了igb_uio驅(qū)動(dòng)后簿废,會(huì)調(diào)用驅(qū)動(dòng)的probe函數(shù)igbuio_pci_probe空入,主要做了如下幾個(gè)事情:
a. 調(diào)用pci_enable_device使能pci設(shè)備
b. 設(shè)置DMA mask
c. 填充struct uio_info信息,注冊(cè)u(píng)io設(shè)備
d. 注冊(cè)中斷處理函數(shù)
static int
igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
struct rte_uio_pci_dev *udev;
dma_addr_t map_dma_addr;
void *map_addr;
udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
//使能pci設(shè)備
/*
* enable device: ask low-level code to enable I/O and
* memory
*/
pci_enable_device(dev);
/* enable bus mastering on the device */
pci_set_master(dev);
//將設(shè)備的memory類型BAR信息保存到 struct uio_info->mem中族檬,
//將設(shè)備的io類型BAR信息保存到 struct uio_info->port中
/* remap IO memory */
igbuio_setup_bars(dev, &udev->info);
/* set 64-bit DMA mask */
pci_set_dma_mask(dev, DMA_BIT_MASK(64));
pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
//填充 struct uio_info 其他字段
/* fill uio infos */
udev->info.name = "igb_uio";
udev->info.version = "0.1";
udev->info.irqcontrol = igbuio_pci_irqcontrol;
udev->info.open = igbuio_pci_open;
udev->info.release = igbuio_pci_release;
udev->info.priv = udev;
udev->pdev = dev;
//創(chuàng)建 /sys/bus/pci/devices/'pci address'/max_vf 文件歪赢,
//寫此文件用來(lái)生成 VF,這說(shuō)明即使網(wǎng)卡綁定到igb_uio口单料,仍然可以
//生成 VF埋凯。
sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
//注冊(cè)u(píng)io,會(huì)生成 /dev/uiox 字符設(shè)備文件扫尖,
//同時(shí)生成目錄 /sys/bus/pci/devices/'pci address'/uio/uiox
/* register uio driver */
uio_register_device(&dev->dev, &udev->info);
//保存 struct rte_uio_pci_dev 到 dev->driver_data
pci_set_drvdata(dev, udev);
dev_set_drvdata(&pdev->dev, data);
dev->driver_data = data;
宏uio_register_device用來(lái)注冊(cè)u(píng)io設(shè)備递鹉。
/* use a define to avoid include chaining to get THIS_MODULE */
#define uio_register_device(parent, info) \
__uio_register_device(THIS_MODULE, parent, info)
int __uio_register_device(struct module *owner,
struct device *parent,
struct uio_info *info)
//根據(jù) uio_info 生成 uio_device
struct uio_device *idev;
idev = devm_kzalloc(parent, sizeof(*idev), GFP_KERNEL);
idev->owner = owner;
idev->info = info;
init_waitqueue_head(&idev->wait);
atomic_set(&idev->event, 0);
//分配最小未使用的id,保存到 idev->minor
uio_get_minor(idev);
//創(chuàng)建字符設(shè)備 /dev/uiox
idev->dev = device_create(&uio_class, parent, MKDEV(uio_major, idev->minor), idev, "uio%d", idev->minor);
//在 /sys/class/uio/uiox/下創(chuàng)建maps目錄藏斩,maps目錄下根據(jù) struct uio_info->mem和port信息
//分別生成 mapx 和 portx 等目錄躏结,這些目錄下又存放對(duì)應(yīng)類型的信息,比如起始地址狰域,name媳拴,offset和size。
//用戶態(tài)可以通過(guò)mmap mapx下的文件來(lái)操作網(wǎng)卡寄存器兆览。
//但是DPDK沒(méi)有使用此方法屈溉,而是直接mmap /sys/bus/pci/devices/'pci address'/resource0 文件實(shí)現(xiàn)。
uio_dev_add_attributes(idev);
info->uio_dev = idev;
//注冊(cè)中斷抬探。但是在新版本的DPDK中子巾,注冊(cè)u(píng)io時(shí)沒(méi)有分配info->irq來(lái)注冊(cè)中斷,
//而是在用戶態(tài) open /dev/uiox 時(shí)小压,在函數(shù) igbuio_pci_open 中注冊(cè)中斷线梗。
if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
devm_request_irq(idev->dev, info->irq, uio_interrupt, info->irq_flags, info->name, idev);
}
簡(jiǎn)單總結(jié)一下,igb_uio是DPDK使用網(wǎng)卡的一個(gè)通用驅(qū)動(dòng)怠益,不只intel網(wǎng)卡可以用仪搔,其他廠商的網(wǎng)卡也可以用(有一個(gè)例外,mellanox的網(wǎng)卡不用綁定到igb_uio就能被使用DPDK)蜻牢,因?yàn)樗皇鼓芰藀ci設(shè)備烤咧,注冊(cè)u(píng)io,和注冊(cè)中斷處理函數(shù)抢呆,這些工作是不區(qū)分網(wǎng)卡類型的煮嫌。
加載igb_uio時(shí),不會(huì)自動(dòng)探測(cè)pci設(shè)備抱虐,而是需要寫sys文件將設(shè)備綁定到igb_uio昌阿。
igb_uio依賴uio驅(qū)動(dòng),注冊(cè)u(píng)io設(shè)備后,會(huì)生成/dev/uiox宝泵,和網(wǎng)卡一一對(duì)應(yīng),用戶態(tài)可以poll /dev/uiox監(jiān)聽中斷是否到來(lái)轩娶。
同時(shí)uio設(shè)備還會(huì)將網(wǎng)卡的BAR地址通過(guò)sys文件系統(tǒng)暴露出去儿奶,用戶態(tài)可以mmap sys文件后操作網(wǎng)卡寄存器。但是DPDK沒(méi)有采用這種方式鳄抒,而是直接mmap網(wǎng)卡自身暴露出去的sys文件 /sys/bus/pci/devices/'pci address'/resource0闯捎。
參考
https://www.cnblogs.com/jungle1996/p/12398915.html
https://www.cnblogs.com/jungle1996/p/12452636.html