……續(xù)上回Fibonacci數(shù)列高效解法大全及時間復雜度分析 連載【8】
在家用電腦、手機都是一堆核心,數(shù)框框的今日
自然會想到的一項提速手段是——并行運算
就以 Fibonacci數(shù)列高效解法大全及時間復雜度分析 連載【7】中第14節(jié)的“生成數(shù)列的GMP內置fib2函數(shù)解法”來說
生成從0至n的斐波那契數(shù)列是一個數(shù)算完放入結果列表争舞,再算下一個數(shù)
這完全可以用我們的CPU多核同時算幾個數(shù),四核的話同時算四個數(shù)多好多快,完美
這就是分布在多核的多進程并行算法
四核下會是單核的4倍速嗎?也就是加速比能到達4嗎尼桶?
以四核為例,當實現(xiàn)加速比為4锯仪,那么并行算法效率就為100%
呃泵督,那是理想狀態(tài),實際遠遠到不了
到不了的原因在于變成多核多進程庶喜,一個大任務要在主進程分解成幾個子任務(子進程)分配到各核運算小腊,子任務算完后再匯總回主進程,這就有了附加開銷久窟。包括任務分解秩冈、任務調度、傳參斥扛、子進程上下文切換漩仙、進程間傳遞數(shù)據(jù)、進程間同步鎖犹赖、結果合并等,這些開銷很大卷仑。讓效率下降不少
就生成斐波那契數(shù)列而言峻村,實現(xiàn)其并行算法,中間開銷最大的應該是進程間傳遞數(shù)據(jù)锡凝,子進程要返回主進程超大規(guī)模的數(shù)據(jù)
那么對于單機這情況粘昨,都知道通常是共享內存方式最快
可以套用時髦的概念“內存計算”
那么下面我來實現(xiàn)一下,共享內存并行算法
17.? 共享內存并行解法
import time
import multiprocessing as mp
import ctypes
import array
import gmpy2
def fib2_processes(shared_array_for_fib_seq_results: 'array ctypes.c_c_ulonglong', element_length_record: 'array ctypes.c_ulonglong', write_protection_pointer: 'ctypes.c_longlong', unread_quantity: 'ctypes.c_ulonglong', shared_array_overflow: 'ctypes.c_bool', mutexlock: 'Lock', n_iterable: 'iterable') -> None:
? ? start_time = time.process_time_ns()
? ? Fib_seq_array_size = len(shared_array_for_fib_seq_results)
? ? sizeof_c_ulonglong = ctypes.sizeof(ctypes.c_ulonglong)
? ? write_pointer = 0
? ? for n in n_iterable:
? ? ? ? for element in gmpy2.fib2(n):
? ? ? ? ? ? element_bytes = gmpy2.to_binary(element)[2:]
? ? ? ? ? ? element_byte_length = len(element_bytes)
? ? ? ? ? ? element_length = (element_byte_length >> 3) + (0 if element_byte_length & 0b111 == 0 else 1)
? ? ? ? ? ? next_write_pointer = write_pointer + element_length
? ? ? ? ? ? while shared_array_overflow.value and (next_write_pointer > write_protection_pointer.value):
? ? ? ? ? ? ? ? pass? #在溢出狀態(tài)未消除情況下窜锯,如果要寫入的范圍高于寫保護张肾,就空轉等待。
? ? ? ? ? ? if next_write_pointer > Fib_seq_array_size:
? ? ? ? ? ? ? ? shared_array_overflow.value = True
? ? ? ? ? ? ? ? write_pointer = 0
? ? ? ? ? ? ? ? next_write_pointer = element_length
? ? ? ? ? ? element_length_record[n] = element_length
? ? ? ? ? ? with mutexlock:
? ? ? ? ? ? ? ? shared_array_for_fib_seq_results[write_pointer: next_write_pointer] = array.array('Q', element_bytes + (b'\x00' * (element_length * sizeof_c_ulonglong - element_byte_length)))
? ? ? ? ? ? ? ? unread_quantity.value += 1
? ? ? ? ? ? write_pointer = next_write_pointer
? ? ? ? ? ? n -= 1
? ? end_time = time.process_time_ns()
? ? print ('計算和序列化用時 %.1f seconds.' % ((end_time - start_time)/10**9))
? ? return None
def Fibonacci_sequence_21 (n: int) -> list:? #參數(shù)n是表示求n項Fibonacci數(shù)列
? ? '多進程共享內存的返回列表的GMP內置fib函數(shù)解法'
? ? assert isinstance(n, int), 'n is an error of non-integer type.'
? ? if n>=0:
? ? ? ? start_time = time.process_time_ns()
? ? ? ? Number_of_processes = max(mp.cpu_count() - 1, 1)
? ? ? ? size_of_shared_variables_for_fib_processes = 1 * (1024 ** 2) // ctypes.sizeof(ctypes.c_ulonglong) // Number_of_processes? #就是占用的內存數(shù)量锚扎,1的單位MB吞瞪。注意太大的n也要隨之加大這個尺寸
? ? ? ? list_of_shared_array_for_fib_seq_results = []
? ? ? ? list_of_element_length_record = []
? ? ? ? list_of_write_protection_pointer = []
? ? ? ? list_of_unread_quantity = []
? ? ? ? list_of_shared_array_overflow = []
? ? ? ? list_of_mutexlock = []
? ? ? ? element_length_record = mp.RawArray(ctypes.c_ulonglong, n + 1)
? ? ? ? for i in range(Number_of_processes):
? ? ? ? ? ? list_of_shared_array_for_fib_seq_results.append(mp.RawArray(ctypes.c_ulonglong, size_of_shared_variables_for_fib_processes))
? ? ? ? ? ? list_of_element_length_record.append(element_length_record)
? ? ? ? ? ? list_of_write_protection_pointer.append(mp.RawValue(ctypes.c_longlong, -1))
? ? ? ? ? ? list_of_unread_quantity.append(mp.RawValue(ctypes.c_ulonglong, 0))
? ? ? ? ? ? list_of_shared_array_overflow.append(mp.RawValue(ctypes.c_bool, False))
? ? ? ? ? ? list_of_mutexlock.append(mp.Lock())
? ? ? ? list_of_n_iterable_for_fib2_processes = [range(n - i * 2, 0, -(Number_of_processes * 2)) for i in range(Number_of_processes)]
? ? ? ? fib2_process_list = [None] * Number_of_processes
? ? ? ? for i in range(Number_of_processes):
? ? ? ? ? ? fib2_process_list[i] = mp.Process(target = fib2_processes, args = (list_of_shared_array_for_fib_seq_results[i], list_of_element_length_record[i], list_of_write_protection_pointer[i], list_of_unread_quantity[i], list_of_shared_array_overflow[i], list_of_mutexlock[i], list_of_n_iterable_for_fib2_processes[i]))
? ? ? ? ? ? fib2_process_list[i].start()
? ? ? ? fib_list = [None] * (n + 1)
? ? ? ? n_list_for_fib2_processes = []
? ? ? ? for n_iterable in list_of_n_iterable_for_fib2_processes:
? ? ? ? ? ? n_list_for_fib2_processes.append(list(n_iterable))
? ? ? ? mv_list_of_results = []
? ? ? ? for i in range(Number_of_processes):
? ? ? ? ? ? mv_list_of_results.append(memoryview(list_of_shared_array_for_fib_seq_results[i]))
? ? ? ? list_of_pointers_to_n_list = [0] * Number_of_processes
? ? ? ? list_of_number_of_reciprocating = [0] * Number_of_processes
? ? ? ? list_of_read_pointer =? [0] * Number_of_processes
? ? ? ? while True:
? ? ? ? ? ? if mp.active_children() == []:? #檢測 當全部子進程都不是活的以后,進行下一個檢測準備結束循環(huán)
? ? ? ? ? ? ? ? for unread_quantity in list_of_unread_quantity:
? ? ? ? ? ? ? ? ? ? if unread_quantity.value != 0: break
? ? ? ? ? ? ? ? else:? ? #檢測當所有進程結果的未讀數(shù)都等于零就結束循環(huán)
? ? ? ? ? ? ? ? ? ? break
? ? ? ? ? ? for i in range(Number_of_processes):
? ? ? ? ? ? ? ? if list_of_unread_quantity[i].value != 0:
? ? ? ? ? ? ? ? ? ? n_for_fib2_processes = n_list_for_fib2_processes[i][list_of_pointers_to_n_list[i]] - list_of_number_of_reciprocating[i]
? ? ? ? ? ? ? ? ? ? next_read_pointer = list_of_read_pointer[i] + list_of_element_length_record[i][n_for_fib2_processes]
? ? ? ? ? ? ? ? ? ? if next_read_pointer > size_of_shared_variables_for_fib_processes:
? ? ? ? ? ? ? ? ? ? ? ? list_of_read_pointer[i] = 0
? ? ? ? ? ? ? ? ? ? ? ? next_read_pointer = list_of_element_length_record[i][n_for_fib2_processes]
? ? ? ? ? ? ? ? ? ? ? ? list_of_shared_array_overflow[i].value = False
? ? ? ? ? ? ? ? ? ? fib_list[n_for_fib2_processes] = int.from_bytes(mv_list_of_results[i][list_of_read_pointer[i]: next_read_pointer], 'little')
? ? ? ? ? ? ? ? ? ? with list_of_mutexlock[i]:
? ? ? ? ? ? ? ? ? ? ? ? list_of_unread_quantity[i].value -= 1
? ? ? ? ? ? ? ? ? ? ? ? list_of_read_pointer[i] = next_read_pointer
? ? ? ? ? ? ? ? ? ? ? ? list_of_write_protection_pointer[i].value = next_read_pointer
? ? ? ? ? ? ? ? ? ? list_of_pointers_to_n_list[i] += list_of_number_of_reciprocating[i]
? ? ? ? ? ? ? ? ? ? list_of_number_of_reciprocating[i] ^= 1
? ? ? ? if n & 1 == 0:
? ? ? ? ? ? fib_list[0] = 0
? ? ? ? end_time = time.process_time_ns()
? ? ? ? print ('主進程用時 %.1f seconds.' % ((end_time - start_time)/10**9))
? ? ? ? return fib_list
? ? else:
? ? ? ? return None
if __name__ == '__main__':
? ? ? ? start_time = time.perf_counter()
? ? ? ? Fibonacci_sequence_21(1000000)
? ? ? ? end_time = time.perf_counter()
? ? ? ? print ('最終用時 %.1f seconds.' % (end_time - start_time))
在我的四核16GB物理內存電腦上算100萬斐波那契數(shù)列(數(shù)據(jù)總占用內存40GB驾孔,所以包括虛擬內存)用時為:
1134秒
在之前14節(jié)單進程解法算100萬斐波那契數(shù)列用時為:
1857秒
縮短用時38.9%
然而這是4核并行芍秆,并行效率為40.9%((1857/1134)/4=0.409)。效率一般的并行算法
各位大佬翠勉,以14節(jié)那個單進程解法為基礎妖啥,來嘗試寫出你們的高效并行解法
我設個獎勵,按上面條件寫出并行效率>50%对碌,獎64¥
并行效率>60%荆虱,獎128¥
并行效率>70%,獎256¥
并行效率>80%,獎512¥
并行效率>90%怀读,獎1024¥
封頂了诉位,沒有了
請在下方留言吧,并給我私信
結尾愿吹,嘿不从,歡迎點贊和贊賞支持
未完待續(xù)……