環(huán)境
ubuntu 16.04
Mellanox Technologies MT26428 [ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE] (rev b0)
omnisky@omnisky:~$ lspci | grep Mell
01:00.0 InfiniBand: Mellanox Technologies MT26428 [ConnectX VPI PCIe 2.0 5GT/s - IB QDR / 10GigE] (rev b0)
下載驅(qū)動
http://cn.mellanox.com/page/products_dyn?product_family=26&mtag=linux_sw_drivers
安裝驅(qū)動
ISO格式
掛載鏡像
mount -o ro,loop MLNX_OFED_LINUX-2.1-1.0.0-rhel6.4-x86_64.iso /mnt
在/mnt目錄下執(zhí)行如下命令:
./mlnxofedinstall
tgz格式
tar -zxvf MLNX_OFED_LINUX-4.5-1.0.1.0-ubuntu16.04-x86_64.tgz
# 進入
./mlnxofedinstall --force
#安裝完畢,加載驅(qū)動
/etc/init.d/openibd restart
配置IB
成功安裝IB驅(qū)動之后,使用命令ifocnfig
鳞贷,可以看見ib0
vi /etc/network/interfaces
菠赚,配置IB的IP
重啟網(wǎng)絡(luò)
ifconfig
# 顯示
ib0 Link encap:UNSPEC HWaddr A0-00-02-20-FE-80-00-00-00-00-00-00-00-00-00-00
inet addr:11.11.11.11 Bcast:11.11.11.255 Mask:255.255.255.0
inet6 addr: fe80::202:c903:52:bdd9/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:2044 Metric:1
RX packets:31 errors:0 dropped:0 overruns:0 frame:0
TX packets:47 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:256
RX bytes:3783 (3.7 KB) TX bytes:5153 (5.1 KB)
啟動相關(guān)服務(wù)
# 主節(jié)點啟動
/etc/init.d/openibd restart
/etc/init.d/opensmd restart
# 其他節(jié)點
/etc/init.d/openibd restart
如果配置成功探橱,則如下所示
root@master:/home/omnisky# ibstat
CA 'mlx4_0'
CA type: MT26428
Number of ports: 1
Firmware version: 2.9.1200
Hardware version: b0
Node GUID: 0x0002c9030052bdd8
System image GUID: 0x0002c9030052bddb
Port 1:
State: Active
Physical state: LinkUp
Rate: 40
Base lid: 1
LMC: 0
SM lid: 1
Capability mask: 0x0251086a
Port GUID: 0x0002c9030052bdd9
Link layer: InfiniBand
測試IB
# server
ib_write_bw -a -d mlx4_0
# client
ib_write_bw -a -F $server_IP -d mlx4_0 --report_gbits
# mlx4_0 通過ibstat查詢
如果正常
[root@mofed-test-pod1 /]# ib_write_bw -a -F 10.244.1.171 -d mlx4_0 --report_gbits
---------------------------------------------------------------------------------------
RDMA_Write BW Test
Dual-port : OFF Device : mlx4_0
Number of qps : 1 Transport type : IB
Connection type : RC Using SRQ : OFF
TX depth : 128
CQ Moderation : 100
Mtu : 4096[B]
Link type : IB
Max inline data : 0[B]
rdma_cm QPs : OFF
Data ex. method : Ethernet
---------------------------------------------------------------------------------------
local address: LID 0x01 QPN 0x0218 PSN 0xa65d9c RKey 0x001100 VAddr 0x007f392cbec000
remote address: LID 0x03 QPN 0x0218 PSN 0xdad5f RKey 0x001100 VAddr 0x007ff453bc3000
---------------------------------------------------------------------------------------
#bytes #iterations BW peak[Gb/sec] BW average[Gb/sec] MsgRate[Mpps]
2 5000 0.054750 0.052885 3.305343
4 5000 0.15 0.13 4.134340
8 5000 0.30 0.30 4.673526
16 5000 0.60 0.60 4.654721
32 5000 1.20 1.18 4.594563
64 5000 2.39 2.32 4.526180
128 5000 4.79 4.78 4.669972
256 5000 9.56 9.54 4.658179
512 5000 19.00 18.76 4.580314
1024 5000 23.97 23.93 2.921618
2048 5000 25.29 25.23 1.539684
4096 5000 26.32 26.31 0.803058
8192 5000 26.85 26.84 0.409597
16384 5000 27.09 27.09 0.206665
32768 5000 27.21 27.21 0.103811
65536 5000 27.28 27.28 0.052026
131072 5000 27.37 27.28 0.026016
262144 5000 27.28 27.28 0.013009
524288 5000 27.38 27.30 0.006509
1048576 5000 27.34 27.33 0.003258
2097152 5000 27.34 27.33 0.001629
4194304 5000 27.33 27.32 0.000814
8388608 5000 27.32 27.32 0.000407
---------------------------------------------------------------------------------------
k8s rdma插件安裝以及測試
環(huán)境
root@master:/home/omnisky# kubectl get nodes -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
master Ready master 48m v1.13.3 192.168.207.122 <none> Ubuntu 16.04.5 LTS 4.15.0-45-generic docker://18.6.1
omnisky Ready <none> 46m v1.13.3 192.168.207.124 <none> Ubuntu 16.04.5 LTS 4.15.0-45-generic docker://18.6.1
安裝插件
git clone https://github.com/Mellanox/k8s-rdma-sriov-dev-plugin.git
# 進入該文件夾
# Create config map to describe mode as "hca" mode. This is per node configuration.
kubectl create -f example/hca/rdma-hca-node-config.yaml
# Deploy device plugin
kubectl create -f example/device-plugin.yaml
啟動容器測試
# Create test pod which requests 1 vhca resource.
kubectl create -f example/hca/test-hca-pod.yaml
復制文件test-hca-pod.yaml
修改文件name销凑,同時指定nodeName溉苛,確保pod運行在不同節(jié)點
apiVersion: v1
kind: Pod
metadata:
name: mofed-test-pod1
spec:
restartPolicy: OnFailure
nodeName: master
containers:
- image: mellanox/centos_7_4_mofed_4_2_1_2_0_0_60
name: mofed-test-ctr
securityContext:
capabilities:
add: [ "IPC_LOCK" ]
resources:
limits:
rdma/hca: 1
command:
- sh
- -c
- |
ls -l /dev/infiniband /sys/class/net
sleep 1000000
kubectl create -f example/hca/test_custom.yaml
# 兩個不同節(jié)點的pods
root@master:/home/omnisky/ty/k8s-rdma-sriov-dev-plugin/example/hca# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
mofed-test-pod 1/1 Running 0 49m 10.244.1.171 omnisky <none> <none>
mofed-test-pod1 1/1 Running 0 49m 10.244.0.4 master <none> <none>
分別進入兩個容器
kubectl exec -it pod_name bash
ibstat
的結(jié)果正常
一個做服務(wù)端酬滤,一個做客戶端签餐,測試
# server
ib_write_bw -a -d mlx4_0
# client
ib_write_bw -a -F $server_IP -d mlx4_0 --report_gbits
# mlx4_0 通過ibstat查詢