本文主要借鑒了[Multi-GPU basics]https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/6_MultiGPU/multigpu_basics.ipynb
- 導(dǎo)入所需模塊
import tensorflow as tf
import numpy as np
import datetime
#Processing Units logs
log_device_placement = True
#num of multiplications to perform
n = 10
# Define matrix power
def matpow(M, n):
if n < 1: #Abstract cases where n < 1
return M
else:
return tf.matmul(M, matpow(M, n-1))
- 建圖
# Example: compute A^n + B^n on 2 GPUs
# Create random large matrix
A = np.random.rand(1e4, 1e4).astype('float32')
B = np.random.rand(1e4, 1e4).astype('float32')
# Creates a graph to store results
c1 = []
c2 = []
- single gpu:
# Single GPU computing
with tf.device('/gpu:0'):
a = tf.constant(A)
b = tf.constant(B)
#compute A^n and B^n and store results in c1
c1.append(matpow(a, n))
c1.append(matpow(b, n))
with tf.device('/cpu:0'):
sum = tf.add_n(c1) #Addition of all elements in c1, i.e. A^n + B^n
t1_1 = datetime.datetime.now()
with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess:
# Runs the op.
sess.run(sum)
t2_1 = datetime.datetime.now()
- multi-gpu:
# Multi GPU computing
# GPU:0 computes A^n
with tf.device('/gpu:0'):
#compute A^n and store result in c2
a = tf.constant(A)
c2.append(matpow(a, n))
#GPU:1 computes B^n
with tf.device('/gpu:1'):
#compute B^n and store result in c2
b = tf.constant(B)
c2.append(matpow(b, n))
with tf.device('/cpu:0'):
sum = tf.add_n(c2) #Addition of all elements in c2, i.e. A^n + B^n
t1_2 = datetime.datetime.now()
with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess:
# Runs the op.
sess.run(sum)
t2_2 = datetime.datetime.now()
- 測(cè)試結(jié)果:
Single GPU computation time: 0:00:11.833497
Multi GPU computation time: 0:00:07.085913
可見(jiàn)并行地使用GPU計(jì)算可以提高提高運(yùn)行速度嫡秕。