QEMU源码全解析 —— virtio(20)

这篇具有很好参考价值的文章主要介绍了QEMU源码全解析 —— virtio(20)。希望对大家有所帮助。如果存在错误或未考虑完全的地方,请大家不吝赐教,您也可以点击"举报违法"按钮提交疑问。

接前一篇文章:

上回书重点解析了virtio_pci_modern_probe函数。再来回顾一下其中相关的数据结构:

  • struct virtio_pci_device

struct virtio_pci_device的定义在Linux内核源码/drivers/virtio/virtio_pci_common.h中,如下:

/* Our device structure */
struct virtio_pci_device {
	struct virtio_device vdev;
	struct pci_dev *pci_dev;
	union {
		struct virtio_pci_legacy_device ldev;
		struct virtio_pci_modern_device mdev;
	};
	bool is_legacy;

	/* Where to read and clear interrupt */
	u8 __iomem *isr;

	/* a list of queues so we can dispatch IRQs */
	spinlock_t lock;
	struct list_head virtqueues;

	/* array of all queues for house-keeping */
	struct virtio_pci_vq_info **vqs;

	/* MSI-X support */
	int msix_enabled;
	int intx_enabled;
	cpumask_var_t *msix_affinity_masks;
	/* Name strings for interrupts. This size should be enough,
	 * and I'm too lazy to allocate each name separately. */
	char (*msix_names)[256];
	/* Number of available vectors */
	unsigned int msix_vectors;
	/* Vectors allocated, excluding per-vq vectors if any */
	unsigned int msix_used_vectors;

	/* Whether we have vector per vq */
	bool per_vq_vectors;

	struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
				      struct virtio_pci_vq_info *info,
				      unsigned int idx,
				      void (*callback)(struct virtqueue *vq),
				      const char *name,
				      bool ctx,
				      u16 msix_vec);
	void (*del_vq)(struct virtio_pci_vq_info *info);

	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
};

virtio_pci_modern_probe执行完成后,相关数据结构如下图所示:

QEMU源码全解析 —— virtio(20),KVM,QEMU,QEMU,KVM

回到virtio_pci_probe函数。在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:

static int virtio_pci_probe(struct pci_dev *pci_dev,
			    const struct pci_device_id *id)
{
	struct virtio_pci_device *vp_dev, *reg_dev = NULL;
	int rc;

	/* allocate our structure and fill it out */
	vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
	if (!vp_dev)
		return -ENOMEM;

	pci_set_drvdata(pci_dev, vp_dev);
	vp_dev->vdev.dev.parent = &pci_dev->dev;
	vp_dev->vdev.dev.release = virtio_pci_release_dev;
	vp_dev->pci_dev = pci_dev;
	INIT_LIST_HEAD(&vp_dev->virtqueues);
	spin_lock_init(&vp_dev->lock);

	/* enable the device */
	rc = pci_enable_device(pci_dev);
	if (rc)
		goto err_enable_device;

	if (force_legacy) {
		rc = virtio_pci_legacy_probe(vp_dev);
		/* Also try modern mode if we can't map BAR0 (no IO space). */
		if (rc == -ENODEV || rc == -ENOMEM)
			rc = virtio_pci_modern_probe(vp_dev);
		if (rc)
			goto err_probe;
	} else {
		rc = virtio_pci_modern_probe(vp_dev);
		if (rc == -ENODEV)
			rc = virtio_pci_legacy_probe(vp_dev);
		if (rc)
			goto err_probe;
	}

	pci_set_master(pci_dev);

	rc = register_virtio_device(&vp_dev->vdev);
	reg_dev = vp_dev;
	if (rc)
		goto err_register;

	return 0;

err_register:
	if (vp_dev->is_legacy)
		virtio_pci_legacy_remove(vp_dev);
	else
		virtio_pci_modern_remove(vp_dev);
err_probe:
	pci_disable_device(pci_dev);
err_enable_device:
	if (reg_dev)
		put_device(&vp_dev->vdev.dev);
	else
		kfree(vp_dev);
	return rc;
}

接QEMU源码全解析 —— virtio(18)中的内容,前文书讲到了virtio_pci_probe函数的第5步,

“(5)调用virtio_pci_legacy_probe或者virtio_pci_modern_probe函数来初始化该PCI设备对应的virtio设备。”,继续往下进行。

(6)virtio_pci_probe函数在调用virtio_pci_modern_probe函数之后,接下来会调用register_virtio_device。代码片段如下:

	rc = register_virtio_device(&vp_dev->vdev);
	reg_dev = vp_dev;
	if (rc)
		goto err_register;

register_virtio_device函数在Linux内核源码/drivers/virtio/virtio.c中,代码如下:

/**
 * register_virtio_device - register virtio device
 * @dev        : virtio device to be registered
 *
 * On error, the caller must call put_device on &@dev->dev (and not kfree),
 * as another code path may have obtained a reference to @dev.
 *
 * Returns: 0 on suceess, -error on failure
 */
int register_virtio_device(struct virtio_device *dev)
{
	int err;

	dev->dev.bus = &virtio_bus;
	device_initialize(&dev->dev);

	/* Assign a unique device index and hence name. */
	err = ida_alloc(&virtio_index_ida, GFP_KERNEL);
	if (err < 0)
		goto out;

	dev->index = err;
	err = dev_set_name(&dev->dev, "virtio%u", dev->index);
	if (err)
		goto out_ida_remove;

	err = virtio_device_of_init(dev);
	if (err)
		goto out_ida_remove;

	spin_lock_init(&dev->config_lock);
	dev->config_enabled = false;
	dev->config_change_pending = false;

	INIT_LIST_HEAD(&dev->vqs);
	spin_lock_init(&dev->vqs_list_lock);

	/* We always start by resetting the device, in case a previous
	 * driver messed it up.  This also tests that code path a little. */
	virtio_reset_device(dev);

	/* Acknowledge that we've seen the device. */
	virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);

	/*
	 * device_add() causes the bus infrastructure to look for a matching
	 * driver.
	 */
	err = device_add(&dev->dev);
	if (err)
		goto out_of_node_put;

	return 0;

out_of_node_put:
	of_node_put(dev->dev.of_node);
out_ida_remove:
	ida_free(&virtio_index_ida, dev->index);
out:
	virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
	return err;
}
EXPORT_SYMBOL_GPL(register_virtio_device);

前文已提到,vp_dev->vdev的类型为struct virtio_device,而传给register_virtio_device函数的实参为vp_dev->vdev的地址,即&vp_dev->vdev。从函数名以及参数类型就能看出,register_virtio_device函数的作用是将一个virtio device注册到系统中。具体步骤如下:

(1)设置virtio设备的Bus为virtio_bus。代码片段如下:

    dev->dev.bus = &virtio_bus;

virtio_bus在系统初始化的时候会注册到系统中。

virtio_bus在Linux内核源码/drivers/virtio/virtio.c中初始化,代码如下:

static struct bus_type virtio_bus = {
	.name  = "virtio",
	.match = virtio_dev_match,
	.dev_groups = virtio_dev_groups,
	.uevent = virtio_uevent,
	.probe = virtio_dev_probe,
	.remove = virtio_dev_remove,
};


int register_virtio_driver(struct virtio_driver *driver)
{
	/* Catch this early. */
	BUG_ON(driver->feature_table_size && !driver->feature_table);
	driver->driver.bus = &virtio_bus;
	return driver_register(&driver->driver);
}
EXPORT_SYMBOL_GPL(register_virtio_driver);

在系统初始化的时候,通过register_virtio_driver函数注册到系统中。

(2)设置virtio设备的名字为类似"virtio0"、"virtio1"的字符串。代码片段如下:

    /* Assign a unique device index and hence name. */
	err = ida_alloc(&virtio_index_ida, GFP_KERNEL);
	if (err < 0)
		goto out;

	dev->index = err;
	err = dev_set_name(&dev->dev, "virtio%u", dev->index);
	if (err)
		goto out_ida_remove;

dev_set_name函数在Linux内核源码/drivers/base/core.c中,代码如下:

/**
 * dev_set_name - set a device name
 * @dev: device
 * @fmt: format string for the device's name
 */
int dev_set_name(struct device *dev, const char *fmt, ...)
{
	va_list vargs;
	int err;

	va_start(vargs, fmt);
	err = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
	va_end(vargs);
	return err;
}
EXPORT_SYMBOL_GPL(dev_set_name);

(3)然后调用virtio_reset_device函数重置设备。代码片段如下:

    /* We always start by resetting the device, in case a previous
	 * driver messed it up.  This also tests that code path a little. */
	virtio_reset_device(dev);

(4)最后,调用device_add函数,将设备注册到系统中。代码片段如下:

    /*
	 * device_add() causes the bus infrastructure to look for a matching
	 * driver.
	 */
	err = device_add(&dev->dev);
	if (err)
		goto out_of_node_put;

这里,老版本代码中是调用的是device_register函数。device_register函数跟设备驱动相关性较大,在此简单介绍一下其作用。

device_register函数在Linux内核源码/drivers/base/core.c中,代码如下:

/**
 * device_register - register a device with the system.
 * @dev: pointer to the device structure
 *
 * This happens in two clean steps - initialize the device
 * and add it to the system. The two steps can be called
 * separately, but this is the easiest and most common.
 * I.e. you should only call the two helpers separately if
 * have a clearly defined need to use and refcount the device
 * before it is added to the hierarchy.
 *
 * For more information, see the kerneldoc for device_initialize()
 * and device_add().
 *
 * NOTE: _Never_ directly free @dev after calling this function, even
 * if it returned an error! Always use put_device() to give up the
 * reference initialized in this function instead.
 */
int device_register(struct device *dev)
{
	device_initialize(dev);
	return device_add(dev);
}
EXPORT_SYMBOL_GPL(device_register);

device_register函数向系统注册一个设备。其分为两个简单的步骤——初始化设备(device_initialize(dev))并将其添加到系统中(device_add(dev))。这两个步骤可以分别调用,但放在一起即使用device_register函数是最简单和最常见的。例如,如果有明确的需求在其添加到层级之前使用和重新计数设备,那么应该分别独立地调用这两个助手(函数)。

从此处的代码就可以知道,老版本的内核代码中确实是直接调用了device_register函数,而新版本内核代码在此处则是在register_virtio_device函数的前边先调用了device_initialize(&dev->dev),而后在这里调用了device_add(&dev->dev)。即采用了分开调用的方式。

device_register函数会调用device_add函数,将设备加到系统中,并且会发送一个uevent消息到用户空间,这个uevent消息中包含了virtio设备的vendor id、device id。 udev接收到此消息之后,会加载virtio设备对应的驱动。然后,device_add函数会调用bus_probe_device函数,最终调用到Bus的probe函数和设备的probe函数,也就是virtio_dev_probe函数和virtballoon_probe函数。

device_add函数也在Linux内核源码/drivers/base/core.c中,就在device_register函数上边,代码如下:

/**
 * device_add - add device to device hierarchy.
 * @dev: device.
 *
 * This is part 2 of device_register(), though may be called
 * separately _iff_ device_initialize() has been called separately.
 *
 * This adds @dev to the kobject hierarchy via kobject_add(), adds it
 * to the global and sibling lists for the device, then
 * adds it to the other relevant subsystems of the driver model.
 *
 * Do not call this routine or device_register() more than once for
 * any device structure.  The driver model core is not designed to work
 * with devices that get unregistered and then spring back to life.
 * (Among other things, it's very hard to guarantee that all references
 * to the previous incarnation of @dev have been dropped.)  Allocate
 * and register a fresh new struct device instead.
 *
 * NOTE: _Never_ directly free @dev after calling this function, even
 * if it returned an error! Always use put_device() to give up your
 * reference instead.
 *
 * Rule of thumb is: if device_add() succeeds, you should call
 * device_del() when you want to get rid of it. If device_add() has
 * *not* succeeded, use *only* put_device() to drop the reference
 * count.
 */
int device_add(struct device *dev)
{
	struct subsys_private *sp;
	struct device *parent;
	struct kobject *kobj;
	struct class_interface *class_intf;
	int error = -EINVAL;
	struct kobject *glue_dir = NULL;

	dev = get_device(dev);
	if (!dev)
		goto done;

	if (!dev->p) {
		error = device_private_init(dev);
		if (error)
			goto done;
	}

	/*
	 * for statically allocated devices, which should all be converted
	 * some day, we need to initialize the name. We prevent reading back
	 * the name, and force the use of dev_name()
	 */
	if (dev->init_name) {
		error = dev_set_name(dev, "%s", dev->init_name);
		dev->init_name = NULL;
	}

	if (dev_name(dev))
		error = 0;
	/* subsystems can specify simple device enumeration */
	else if (dev->bus && dev->bus->dev_name)
		error = dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id);
	else
		error = -EINVAL;
	if (error)
		goto name_error;

	pr_debug("device: '%s': %s\n", dev_name(dev), __func__);

	parent = get_device(dev->parent);
	kobj = get_device_parent(dev, parent);
	if (IS_ERR(kobj)) {
		error = PTR_ERR(kobj);
		goto parent_error;
	}
	if (kobj)
		dev->kobj.parent = kobj;

	/* use parent numa_node */
	if (parent && (dev_to_node(dev) == NUMA_NO_NODE))
		set_dev_node(dev, dev_to_node(parent));

	/* first, register with generic layer. */
	/* we require the name to be set before, and pass NULL */
	error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
	if (error) {
		glue_dir = kobj;
		goto Error;
	}

	/* notify platform of device entry */
	device_platform_notify(dev);

	error = device_create_file(dev, &dev_attr_uevent);
	if (error)
		goto attrError;

	error = device_add_class_symlinks(dev);
	if (error)
		goto SymlinkError;
	error = device_add_attrs(dev);
	if (error)
		goto AttrsError;
	error = bus_add_device(dev);
	if (error)
		goto BusError;
	error = dpm_sysfs_add(dev);
	if (error)
		goto DPMError;
	device_pm_add(dev);

	if (MAJOR(dev->devt)) {
		error = device_create_file(dev, &dev_attr_dev);
		if (error)
			goto DevAttrError;

		error = device_create_sys_dev_entry(dev);
		if (error)
			goto SysEntryError;

		devtmpfs_create_node(dev);
	}

	/* Notify clients of device addition.  This call must come
	 * after dpm_sysfs_add() and before kobject_uevent().
	 */
	bus_notify(dev, BUS_NOTIFY_ADD_DEVICE);
	kobject_uevent(&dev->kobj, KOBJ_ADD);

	/*
	 * Check if any of the other devices (consumers) have been waiting for
	 * this device (supplier) to be added so that they can create a device
	 * link to it.
	 *
	 * This needs to happen after device_pm_add() because device_link_add()
	 * requires the supplier be registered before it's called.
	 *
	 * But this also needs to happen before bus_probe_device() to make sure
	 * waiting consumers can link to it before the driver is bound to the
	 * device and the driver sync_state callback is called for this device.
	 */
	if (dev->fwnode && !dev->fwnode->dev) {
		dev->fwnode->dev = dev;
		fw_devlink_link_device(dev);
	}

	bus_probe_device(dev);

	/*
	 * If all driver registration is done and a newly added device doesn't
	 * match with any driver, don't block its consumers from probing in
	 * case the consumer device is able to operate without this supplier.
	 */
	if (dev->fwnode && fw_devlink_drv_reg_done && !dev->can_match)
		fw_devlink_unblock_consumers(dev);

	if (parent)
		klist_add_tail(&dev->p->knode_parent,
			       &parent->p->klist_children);

	sp = class_to_subsys(dev->class);
	if (sp) {
		mutex_lock(&sp->mutex);
		/* tie the class to the device */
		klist_add_tail(&dev->p->knode_class, &sp->klist_devices);

		/* notify any interfaces that the device is here */
		list_for_each_entry(class_intf, &sp->interfaces, node)
			if (class_intf->add_dev)
				class_intf->add_dev(dev);
		mutex_unlock(&sp->mutex);
		subsys_put(sp);
	}
done:
	put_device(dev);
	return error;
 SysEntryError:
	if (MAJOR(dev->devt))
		device_remove_file(dev, &dev_attr_dev);
 DevAttrError:
	device_pm_remove(dev);
	dpm_sysfs_remove(dev);
 DPMError:
	dev->driver = NULL;
	bus_remove_device(dev);
 BusError:
	device_remove_attrs(dev);
 AttrsError:
	device_remove_class_symlinks(dev);
 SymlinkError:
	device_remove_file(dev, &dev_attr_uevent);
 attrError:
	device_platform_notify_remove(dev);
	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
	glue_dir = get_glue_dir(dev);
	kobject_del(&dev->kobj);
 Error:
	cleanup_glue_dir(dev, glue_dir);
parent_error:
	put_device(parent);
name_error:
	kfree(dev->p);
	dev->p = NULL;
	goto done;
}
EXPORT_SYMBOL_GPL(device_add);

其中的代码片段:

    /* Notify clients of device addition.  This call must come
	 * after dpm_sysfs_add() and before kobject_uevent().
	 */
	bus_notify(dev, BUS_NOTIFY_ADD_DEVICE);
	kobject_uevent(&dev->kobj, KOBJ_ADD);

    bus_probe_device(dev);

就是上边所讲到的:

device_register函数会调用device_add函数,将设备加到系统中,并且会发送一个uevent消息到用户空间,这个uevent消息中包含了virtio设备的vendor id、device id。 udev接收到此消息之后,会加载virtio设备对应的驱动。

然后,device_add函数会调用bus_probe_device函数,最终调用到Bus的probe函数和设备的probe函数,也就是virtio_dev_probe函数和virtballoon_probe函数。

欲知后事如何,且看下回分解。文章来源地址https://www.toymoban.com/news/detail-833246.html

到了这里,关于QEMU源码全解析 —— virtio(20)的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处: 如若内容造成侵权/违法违规/事实不符,请点击违法举报进行投诉反馈,一经查实,立即删除!

领支付宝红包 赞助服务器费用

相关文章

  • QEMU源码全解析23 —— QOM介绍(12)

    接前一篇文章:QEMU源码全解析22 —— QOM介绍(11) 本文内容参考: 《趣谈Linux操作系统》 —— 刘超,极客时间 《QEMU/KVM》源码解析与应用 —— 李强,机械工业出版社 特此致谢! 上一回分析了QEMU对象的构造和初始化的函数调用流程,本回结合实例对此流程进行深入介绍和

    2024年02月14日
    浏览(39)
  • QEMU源码全解析 —— PCI设备模拟(6)

    接前一篇文章: 上一回讲到了pci_edu_realize函数中的pci_register_bar函数,本回对于其进行详细解析。 再次贴出pci_register_bar函数源码,在hw/pci/pci.c中,代码如下: (1)首先根据region_num找到PCIDevice-io_regions数组中对应的项。PCI设备的MMIO存放在PCIIORegion结构体中,结构体中保存了

    2024年01月16日
    浏览(36)
  • QEMU源码全解析 —— PCI设备模拟(7)

    接前一篇文章: 上一回讲解了pci_edu_realize函数中的pci_register_bar函数,本回开始对于edu设备的MMIO读写函数进行解析。 操作系统与PCI设备交互的主要方式是PIO和MMIO。MMIO虽然是一段内存,但是其没有EPT映射,在虚拟机访问设备的MMIO时,会产生VM Exit;KVM识别此MMIO访问并且将该访

    2024年01月22日
    浏览(31)
  • qemu-kvm IO优化

    主要是磁盘方面的IO资源优化  四个方面去着手优化: 1. 磁盘的类型有IDE 、SATA 以及virtio 三种   建议使用 virtio 2. 磁盘缓存模式   目前KVM这块支持5种磁盘缓存模式,writethrough、writeback、none、directsync或者unsafe。一般用到的就是前面3种,后面两种几乎不会使用。   writethrou

    2023年04月08日
    浏览(31)
  • kvm qemu虚拟机的创建和启动

    qemu-img create -f qcow2 win1021H1.qcow2 10G sudo qemu-system-x86_64 -enable-kvm -m 8G -smp 4 -boot once=d -cdrom ./iso/cn_windows_7_enterprise_with_sp1_x64_dvd_u_677685.iso -hda ./win7_x64.qcow2 -vnc :1 -usb -usbdevice tablet 如果没有指定-hda ./win7.qcow2,则在安装系统的时候没有磁盘,如下图片是增加了之后才有的磁盘 默认不

    2024年02月12日
    浏览(50)
  • 巨页内存与Qemu/KVM虚拟化内存优化

    在虚拟化环境中,需要对虚拟机的优化,其中包括在某些情况下利用巨页内存进行内存的优化以提高虚拟机性能。那么什么是巨页内存?巨页内存有什么好处?Qemu/KVM虚拟化环境下如何使用巨页内存?本文将对这几个问题进行阐述。 对于内存管理,大多数现代操作系统都采用

    2024年02月07日
    浏览(45)
  • QEMU-KVM网络特性协商与虚拟机通信

    深入了解QEMU-KVM在启动虚拟机时如何通过代理进行网络前后端特性协商,包括与DPDK vhost-user和guest virtio-net驱动的交互。

    2024年02月01日
    浏览(30)
  • python可视化管理kvm虚拟机(使用libvirt、qemu连接虚拟机)

    对于云计算的实践,在虚拟机上面布置kvm虚拟机后使用python调用libvirt库进行远程可视化管理,实现输出虚拟机信息、新建虚拟机、删除虚拟机等功能,并在虚拟机集群上面运行mpi代码。 用pycharm专业版连接kvm的步骤见本文章。 mpi代码见本文章。

    2024年02月16日
    浏览(39)
  • qemu+kvm安装银河麒麟V10SP1 arm64 虚拟机

    系统镜像 Kylin-Desktop-V10-SP1-Release-2107-arm64.iso QEMU_EFI.fd(下载地址 http://releases.linaro.org/components/kernel/uefi-linaro/16.02/release/qemu64/QEMU_EFI.fd) 注:麒麟系统要求磁盘大小在50G以上. 参数说明: -m 4096 指定内存大小,单位MB -cpu cortex-a72 CPU 型号 -smp 8,cores=8,threads=1,sockets=1 1颗CPU,8核8线

    2024年01月21日
    浏览(103)
  • ubuntu18.04下pass-through直通realteck PCI设备到qemu-kvm虚拟机实践

    设备直通是一种虚拟化资源分配方式,通过将物理设备直通给虚拟机环境,达到虚拟机可以直接访问物理设备的目的,直通功能对设备的要求不高,不需要设备支持PF/VF,目前市面上的显卡/网卡一般都支持直通。典型场景比如有两块显卡,一块主机用,另一块虚拟机用,主板

    2024年02月03日
    浏览(51)

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

博客赞助

微信扫一扫打赏

请作者喝杯咖啡吧~博客赞助

支付宝扫一扫领取红包,优惠每天领

二维码1

领取红包

二维码2

领红包