azeqjz OpenStack: 紅帽O(jiān)SP10 NFV配置指南: 4.3. 配置雙網口OVS-DPDK數(shù)據面綁定與VLAN Tunneling
4.3. 配置雙網口OVS-DPDK數(shù)據面綁定與VLAN Tunneling
*單OVS-DPDK數(shù)據面 單OVS網橋 雙端口 *
這一章節(jié)介紹配置與部署雙數(shù)據面端口綁定為一個OVS-DPDK
嚎货,同時配置OpenStack環(huán)境的控制面Linux網橋綁定镜撩。
組網圖:
OpenStack邏輯網絡較多喻粹,可以大概劃分為三個平面:PXE平面混滔、OpenStack管理面干发、業(yè)務面凯旋。
4.3.1. 修改 first-boot.yaml
- 增加額外資源择诈。
resources:
userdata:
type: OS::Heat::MultipartMime
properties:
parts:
- config: {get_resource: set_ovs_config}
- config: {get_resource: set_dpdk_params}
- config: {get_resource: install_tuned}
- config: {get_resource: compute_kernel_args}
- OVS配置。
set_ovs_config:
type: OS::Heat::SoftwareConfig
properties:
config:
str_replace:
template: |
#!/bin/bash
FORMAT=$COMPUTE_HOSTNAME_FORMAT
if [[ -z $FORMAT ]] ; then
FORMAT="compute" ;
else
# Assumption: only %index% and %stackname% are the variables in Host name format
FORMAT=$(echo $FORMAT | sed 's/\%index\%//g' | sed 's/\%stackname\%//g') ;
fi
if [[ $(hostname) == *$FORMAT* ]] ; then
if [ -f /usr/lib/systemd/system/openvswitch-nonetwork.service ]; then
ovs_service_path="/usr/lib/systemd/system/openvswitch-nonetwork.service"
elif [ -f /usr/lib/systemd/system/ovs-vswitchd.service ]; then
ovs_service_path="/usr/lib/systemd/system/ovs-vswitchd.service"
fi
grep -q "RuntimeDirectoryMode=.*" $ovs_service_path
if [ "$?" -eq 0 ]; then
sed -i 's/RuntimeDirectoryMode=.*/RuntimeDirectoryMode=0775/' $ovs_service_path
else
echo "RuntimeDirectoryMode=0775" >> $ovs_service_path
fi
grep -Fxq "Group=qemu" $ovs_service_path
if [ ! "$?" -eq 0 ]; then
echo "Group=qemu" >> $ovs_service_path
fi
grep -Fxq "UMask=0002" $ovs_service_path
if [ ! "$?" -eq 0 ]; then
echo "UMask=0002" >> $ovs_service_path
fi
ovs_ctl_path='/usr/share/openvswitch/scripts/ovs-ctl'
grep -q "umask 0002 \&\& start_daemon \"\$OVS_VSWITCHD_PRIORITY\"" $ovs_ctl_path
if [ ! "$?" -eq 0 ]; then
sed -i 's/start_daemon \"\$OVS_VSWITCHD_PRIORITY.*/umask 0002 \&\& start_daemon \"$OVS_VSWITCHD_PRIORITY\" \"$OVS_VSWITCHD_WRAPPER\" \"$@\"/' $ovs_ctl_path
fi
fi
params:
$COMPUTE_HOSTNAME_FORMAT: {get_param: ComputeHostnameFormat}
- 設置DPDK參數(shù)俏竞。
set_dpdk_params:
type: OS::Heat::SoftwareConfig
properties:
config:
str_replace:
template: |
#!/bin/bash
set -x
get_mask()
{
local list=$1
local mask=0
declare -a bm
max_idx=0
for core in $(echo $list | sed 's/,/ /g')
do
index=$(($core/32))
bm[$index]=0
if [ $max_idx -lt $index ]; then
max_idx=$(($index))
fi
done
for ((i=$max_idx;i>=0;i--));
do
bm[$i]=0
done
for core in $(echo $list | sed 's/,/ /g')
do
index=$(($core/32))
temp=$((1<<$(($core % 32))))
bm[$index]=$((${bm[$index]} | $temp))
done
printf -v mask "%x" "${bm[$max_idx]}"
for ((i=$max_idx-1;i>=0;i--));
do
printf -v hex "%08x" "${bm[$i]}"
mask+=$hex
done
printf "%s" "$mask"
}
FORMAT=$COMPUTE_HOSTNAME_FORMAT
if [[ -z $FORMAT ]] ; then
FORMAT="compute" ;
else
# Assumption: only %index% and %stackname% are the variables in Host name format
FORMAT=$(echo $FORMAT | sed 's/\%index\%//g' | sed 's/\%stackname\%//g') ;
fi
if [[ $(hostname) == *$FORMAT* ]] ; then
pmd_cpu_mask=$( get_mask $PMD_CORES )
host_cpu_mask=$( get_mask $LCORE_LIST )
socket_mem=$(echo $SOCKET_MEMORY | sed s/\'//g )
ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true
ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-socket-mem=$socket_mem
ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask=$pmd_cpu_mask
ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-lcore-mask=$host_cpu_mask
fi
params:
$COMPUTE_HOSTNAME_FORMAT: {get_param: ComputeHostnameFormat}
$LCORE_LIST: {get_param: HostCpusList}
$PMD_CORES: {get_param: NeutronDpdkCoreList}
$SOCKET_MEMORY: {get_param: NeutronDpdkSocketMemory}
- 設置CPU親和性的tuned配置绸硕。
install_tuned:
type: OS::Heat::SoftwareConfig
properties:
config:
str_replace:
template: |
#!/bin/bash
FORMAT=$COMPUTE_HOSTNAME_FORMAT
if [[ -z $FORMAT ]] ; then
FORMAT="compute" ;
else
# Assumption: only %index% and %stackname% are the variables in Host name format
FORMAT=$(echo $FORMAT | sed 's/\%index\%//g' | sed 's/\%stackname\%//g') ;
fi
if [[ $(hostname) == *$FORMAT* ]] ; then
tuned_conf_path="/etc/tuned/cpu-partitioning-variables.conf"
if [ -n "$TUNED_CORES" ]; then
grep -q "^isolated_cores" $tuned_conf_path
if [ "$?" -eq 0 ]; then
sed -i 's/^isolated_cores=.*/isolated_cores=$TUNED_CORES/' $tuned_conf_path
else
echo "isolated_cores=$TUNED_CORES" >> $tuned_conf_path
fi
tuned-adm profile cpu-partitioning
fi
fi
params:
$COMPUTE_HOSTNAME_FORMAT: {get_param: ComputeHostnameFormat}
$TUNED_CORES: {get_param: HostIsolatedCoreList}
- 設置內核參數(shù)堂竟。
compute_kernel_args:
type: OS::Heat::SoftwareConfig
properties:
config:
str_replace:
template: |
#!/bin/bash
FORMAT=$COMPUTE_HOSTNAME_FORMAT
if [[ -z $FORMAT ]] ; then
FORMAT="compute" ;
else
# Assumption: only %index% and %stackname% are the variables in Host name format
FORMAT=$(echo $FORMAT | sed 's/\%index\%//g' | sed 's/\%stackname\%//g') ;
fi
if [[ $(hostname) == *$FORMAT* ]] ; then
sed 's/^\(GRUB_CMDLINE_LINUX=".*\)"/\1 $KERNEL_ARGS isolcpus=$TUNED_CORES"/g' -i /etc/default/grub ;
grub2-mkconfig -o /etc/grub2.cfg
reboot
fi
params:
$KERNEL_ARGS: {get_param: ComputeKernelArgs}
$COMPUTE_HOSTNAME_FORMAT: {get_param: ComputeHostnameFormat}
$TUNED_CORES: {get_param: HostIsolatedCoreList}
4.3.2. 修改post-install.yaml
- 設置tuned配置以提供CPU親和性。
ExtraConfig:
type: OS::Heat::SoftwareConfig
properties:
group: script
config:
str_replace:
template: |
#!/bin/bash
set -x
FORMAT=$COMPUTE_HOSTNAME_FORMAT
if [[ -z $FORMAT ]] ; then
FORMAT="compute" ;
else
# Assumption: only %index% and %stackname% are the variables in Host name format
FORMAT=$(echo $FORMAT | sed 's/\%index\%//g' | sed 's/\%stackname\%//g') ;
fi
if [[ $(hostname) == *$FORMAT* ]] ; then
tuned_service=/usr/lib/systemd/system/tuned.service
grep -q "network.target" $tuned_service
if [ "$?" -eq 0 ]; then
sed -i '/After=.*/s/network.target//g' $tuned_service
fi
grep -q "Before=.*network.target" $tuned_service
if [ ! "$?" -eq 0 ]; then
grep -q "Before=.*" $tuned_service
if [ "$?" -eq 0 ]; then
sed -i 's/^\(Before=.*\)/\1 network.target openvswitch.service/g' $tuned_service
else
sed -i '/After/i Before=network.target openvswitch.service' $tuned_service
fi
fi
systemctl daemon-reload
fi
params:
$COMPUTE_HOSTNAME_FORMAT: {get_param: ComputeHostnameFormat}
4.3.3. 修改network-environment.yaml
- 在
resource_registry
下添加OVS-DPDK自定義資源臣咖。
resource_registry:
# Specify the relative/absolute path to the config files you want to use for override the default.
OS::TripleO::Compute::Net::SoftwareConfig: nic-configs/compute-ovs-dpdk.yaml
OS::TripleO::Controller::Net::SoftwareConfig: nic-configs/controller.yaml
OS::TripleO::NodeUserData: first-boot.yaml
OS::TripleO::NodeExtraConfigPost: post-install.yaml
- 在
parameter_defaults
下,關閉tunnel類型(設置值為""
),設置網絡類型為vlan
漱牵。
NeutronTunnelTypes: ""
NeutronNetworkType: 'vlan'
- 在
parameter_defaults
下夺蛇,映射物理網絡到虛擬網橋。
NeutronBridgeMappings: 'dpdk:br-link'
- 在4.2中酣胀,兩個端口刁赦,兩個ovs-dpdk的配置為`
NeutronBridgeMappings: 'dpdk0:br-link0,dpdk1:br-link1' *
- 在
parameter_defaults
下,設置OpenStack網絡ML2與OVS VLAN映射范圍闻镶。
NeutronNetworkVLANRanges: 'dpdk:22:22'
這個例子是在物理網絡(dpdk_data)上設置VLAN范圍甚脉。
- 在
parameter_defaults
下,設置OVS-DPDK配置參數(shù)铆农。
注意:
NeutronDPDKCoreList 和NeutronDPDKMemoryChannels是必要的配置牺氨,如果部署DPDK時此參數(shù)值不正確,部署會失敗墩剖,或者導致不穩(wěn)定猴凹。
ⅰ 提供可用作DPDK輪循模式驅動(DPDK poll mode drivers,PMDs)的CPU核列表岭皂,格式為[allowed_pattern: "'[0-9,-]+'"]
郊霎。
NeutronDpdkCoreList: "'4,6,20,22'"
可通過以下選項優(yōu)化OVS-DPDK性能:
- 選擇與DPDK接口的NUMA節(jié)點關聯(lián)CPU。
使用cat /sys/class/net/<interface>/device/numa_node
列出與接口關聯(lián)的NUMA節(jié)點爷绘,使用lscpu
列出與NUMA節(jié)點關聯(lián)的CPU书劝。 - 超線程情況下把CPU sibling放到同個組里( 什么是CPU sibling? )土至。
使用cat /sys/devices/system/cpu/<cpu>/topology/thread_siblings_list
查詢CPU sibling购对。 - 為主機進程預留CPU 0。
- 隔離分配給PMD的CPU陶因,保證主機進程不使用這些CPU洞斯。(以下第12點,HostCpusList)
- 使用
NovaVcpuPinset
把分配給PMD的CPU從計算調度中排除坑赡。(以下第8點烙如,即虛擬機可以使用的vCPU)
》 Type 1: DPDK PMD使用,NeutronDpdkCoreList毅否;Type 2:宿主機進程使用亚铁,HostCpusList;Type 3:虛擬機使用螟加,NovaVcpuPinset徘溢。
》 NovaVcpuPinSet + NeutronDpdkCoreList = HostIsolatedCoreList
ⅱ提供內存通道的數(shù)量吞琐,格式[allowed_pattern: "[0-9]+"]
NeutronDpdkMemoryChannels: "4"
ⅲ 設置從CPU socket的大頁池中預分配的內存。
NeutronDpdkSocketMemory: "2048,2048"
這是用逗號分隔的字符串然爆,按照CPU socket升序排列站粟。這個例子給出2個NUMA節(jié)點的配置,設置socket 0預分配2048MB大頁內存曾雕,socket 1預分配2048MB大頁內存奴烙。如果只有一個NUMA節(jié)點,則設置為 1024,0 剖张。
ⅳ 設置DPDK驅動類型與數(shù)據通道類型切诀。
NeutronDpdkDriverType: "vfio-pci"
NeutronDatapathType: "netdev"
- 在
parameter_defaults
下設置OVS的vhost-user socket目錄。
NeutronVhostuserSocketDir: "/var/run/openvswitch"
- 在
parameter_defaults
下預留給主機進程的RAM搔弄。
NovaReservedHostMemory: 2048
- 在
parameter_defaults
下幅虑,設置預留給虛擬機進程的物理CPU核范圍,以逗號分隔顾犹。
NovaVcpuPinSet: "8,10,12,14,18,24,26,28,30"
- 在
parameter_defaults
下倒庵,列出應用的過濾器。
Nova scheduler使用這些列出來的過濾器炫刷。優(yōu)先列出最有拘束力的過濾器哄芜,以使節(jié)點的過濾進程更加高效運行。
NovaSchedulerDefaultFilters: "RamFilter,ComputeFilter,AvailabilityZoneFilter,ComputeCapabilitiesFilter,ImagePropertiesFilter,PciPassthroughFilter,NUMATopologyFilter"
- 在
parameter_defaults
下柬唯,增加ComputeKernelArgs
參數(shù)认臊,以在初次啟動時增加這些參數(shù)到默認的grub
文件中。
ComputeKernelArgs: "default_hugepagesz=1GB hugepagesz=1G hugepages=32 iommu=pt intel_iommu=on"
注意:這些大頁內存會被虛擬機消耗使用锄奢,也會被OVS-DPDK使用失晴,如在此步驟中的NeutronDpdkSocketMemory參數(shù)所示【醒耄可以被虛擬機使用的大頁內存頁數(shù)是引導參數(shù)減去NeutronDpdkSocketMemory涂屁。
需要在使用DPDK的虛擬機實例flavor中添加hw:mem_page_size=1GB
。如果沒有做這一步灰伟,虛擬機實例會無法獲取DHCP分配(大頁內存拆又?)。
- 在
parameter_defaults
下栏账,設置需要tuned的物理CPU核范圍帖族。
參數(shù)在附錄調整文檔cpu-partitioning
中。
HostIsolatedCoreList: "2,4,6,8,10,12,14,18,20,22,24,26,28,30"
- 在
parameter_defaults
下挡爵,設置邏輯OVS-DPDK核列表竖般。這些CPU核必須要手工從NeutronDpdkCoreList
與NovaVcpuPinSet
列表中排除出去。一般分配每個NUMA節(jié)點第一個物理核與對應進程茶鹃,不管DPDK接口的NUMA位置
HostCpusList: "'3,5,7,19,21,23'"
4.3.4. 修改 controller.yaml
- 創(chuàng)建分離的provisioning接口(PXE平面)涣雕。
-
type: interface
name: nic1
use_dhcp: false
addresses:
-
ip_netmask:
list_join:
- '/'
- - {get_param: ControlPlaneIp}
- {get_param: ControlPlaneSubnetCidr}
routes:
-
ip_netmask: 169.254.169.254/32
next_hop: {get_param: EC2MetadataIp}
-
default: true
next_hop: {get_param: ExternalInterfaceDefaultRoute}
- 為隔離網絡創(chuàng)建控制面Linux綁定(OpenStack管理面)艰亮。
-
type: linux_bond
name: bond_api
bonding_options: "mode=active-backup"
use_dhcp: false
dns_servers: {get_param: DnsServers}
members:
-
type: interface
name: nic2
primary: true
-
type: interface
name: nic3
- 分配VLAN給Linux綁定。
-
type: vlan
vlan_id: {get_param: InternalApiNetworkVlanID}
device: bond_api
addresses:
-
ip_netmask: {get_param: InternalApiIpSubnet}
-
type: vlan
vlan_id: {get_param: TenantNetworkVlanID}
device: bond_api
addresses:
-
ip_netmask: {get_param: TenantIpSubnet}
-
type: vlan
vlan_id: {get_param: StorageNetworkVlanID}
device: bond_api
addresses:
-
ip_netmask: {get_param: StorageIpSubnet}
-
type: vlan
vlan_id: {get_param: StorageMgmtNetworkVlanID}
device: bond_api
addresses:
-
ip_netmask: {get_param: StorageMgmtIpSubnet}
-
type: vlan
vlan_id: {get_param: ExternalNetworkVlanID}
device: bond_api
addresses:
-
ip_netmask: {get_param: ExternalIpSubnet}
- 給計算節(jié)點創(chuàng)建OVS網橋(業(yè)務面)挣郭。
-
type: ovs_bridge
name: br-link
use_dhcp: false
members:
-
type: interface
name: nic4
4.3.5. 修改compute.yaml
復制默認的compute.yaml
為compute-ovs-dpdk.yaml
迄埃,并且修改一下內容:
- 創(chuàng)建分離的provisioning接口(PXE平面)。
-
type: interface
name: nic1
use_dhcp: false
addresses:
-
ip_netmask:
list_join:
- '/'
- - {get_param: ControlPlaneIp}
- {get_param: ControlPlaneSubnetCidr}
routes:
-
ip_netmask: 169.254.169.254/32
next_hop: {get_param: EC2MetadataIp}
-
default: true
next_hop: {get_param: ControlPlaneDefaultRoute}
- 為隔離的網絡創(chuàng)建控制面Linux綁定(OpenStack管理面)兑障。
-
type: linux_bond
name: bond_api
bonding_options: "mode=active-backup"
use_dhcp: false
dns_servers: {get_param: DnsServers}
members:
-
type: interface
name: nic2
primary: true
-
type: interface
name: nic3
- 分配VLAN給這個Linux綁定侄非。
-
type: vlan
vlan_id: {get_param: InternalApiNetworkVlanID}
device: bond_api
addresses:
-
ip_netmask: {get_param: InternalApiIpSubnet}
-
type: vlan
vlan_id: {get_param: TenantNetworkVlanID}
device: bond_api
addresses:
-
ip_netmask: {get_param: TenantIpSubnet}
-
type: vlan
vlan_id: {get_param: StorageNetworkVlanID}
device: bond_api
addresses:
-
ip_netmask: {get_param: StorageIpSubnet}
- 在一個OVS-DPDK數(shù)據面綁定上,設置一個有兩個DPDK端口的網橋(業(yè)務面)旺垒。
-
type: ovs_user_bridge
name: br-link
use_dhcp: false
members:
-
type: ovs_dpdk_bond
name: dpdkbond0
members:
-
type: ovs_dpdk_port
name: dpdk0
members:
-
type: interface
name: nic4
-
type: ovs_dpdk_port
name: dpdk1
members:
-
type: interface
name: nic5
*與 4.2雙OVS-DPDK數(shù)據面 雙OVS網橋 雙端口 對比彩库,多了 type: ovs_dpdk_bond
*
NOTE
注意:
如果有多個DPDK設備肤无,為每個需要添加的DPDK設備復制一遍type
字段即可先蒋。
注意:
使用OVS-DPDK時,同一個計算節(jié)點上的所有網橋類型應該為ovs_user_bridge
宛渐。當不是這個類型時竞漾,雖然Director可能會接受這個配置,但是Red Hat OpenStack Platform不支持同個節(jié)點上同時有ovs_bridge
和ovs_user_bridge
窥翩。
4.3.6. 執(zhí)行 overcloud_deploy.sh 腳本
以下例子定義bash腳本中的OVS-DPDK環(huán)境openstack overcloud deploy
命令:
#!/bin/bash
openstack overcloud deploy --templates \
-e /usr/share/openstack-tripleo-heat-templates/environments/network-isolation.yaml \
-e /usr/share/openstack-tripleo-heat-templates/environments/neutron-ovs-dpdk.yaml \
-e /home/stack/ospd-10-vlan-ovs-dpdk-bonding-dataplane-bonding-ctlplane/network-environment.yaml
以上最后一行不同于4.2
/usr/share/openstack-tripleo-heat-templates/environments/neutron-ovs-dpdk.yaml
是默認neutron-ovs-dpdk.yaml
文件的位置业岁,這使能計算節(jié)點的OVS-DPDK參數(shù)。/home/stack/<relative-directory>/network-environment.yaml
是network-environment.yaml
文件的路徑寇蚊。使用這個文件來覆蓋neutron-ovs-dpdk.yaml
文件的默認值笔时。
注意:
overcloud部署后,需要重啟計算節(jié)點以執(zhí)行tuned文件仗岸。
注意:
此OVS-DPDK配置不支持安全組與熱遷移允耿。