之前經(jīng)常使用golang測試框架中的單元測試拴泌,一直沒用性能測試足绅,今天想熟悉一下golang的Benchmark順便給堆排和快排做個性能測試间聊,測試非常簡單,源代碼如下:
//sort.go
package mysort
import (
"math/rand"
"time"
)
func swap(nums []int, i, j int) {
nums[i], nums[j] = nums[j], nums[i]
}
func parition(nums []int, start, end int) int {
idx := rand.Int()%(end-start) + 1 + start
swap(nums, idx, end)
idx = end
for start < end {
for nums[start] <= nums[idx] && start < end {
start++
}
for nums[end] >= nums[idx] && start < end {
end--
}
swap(nums, start, end)
}
swap(nums, start, idx)
return start
}
//quick sort
func QSort(nums []int, start, end int) {
rand.Seed(time.Now().UnixNano())
if start < end {
p := parition(nums, start, end)
QSort(nums, start, p-1)
QSort(nums, p+1, end)
}
}
//生成一個隨機的數(shù)組旁瘫,長度為len, 元素最大值不超過max
func GenRandSlice(len, max int) []int {
rand.Seed(time.Now().UnixNano())
a := make([]int, 0)
for i := 0; i < len; i++ {
a = append(a, rand.Int()%max)
}
return a
}
//堆排序
func left(i int) int {
return i << 1
}
func right(i int) int {
return i<<1 + 1
}
func maxHeapify(a []int, i int) {
l := left(i)
r := right(i)
max := i
aLen := len(a)
if l < aLen && a[l] > a[max] {
max = l
}
if r < aLen && a[r] > a[max] {
max = r
}
if max != i {
swap(a, i, max)
maxHeapify(a, max)
}
}
func BuildMaxHeap(a []int) {
aLen := len(a)
if aLen == 0 {
return
}
for i := aLen/2 - 1; i >= 0; i-- {
maxHeapify(a, i)
}
}
func HeapSort(a []int) {
BuildMaxHeap(a)
aLen := len(a)
tmp := a[:]
for i := aLen - 1; i >= 1; i-- {
swap(tmp, 0, i)
tmp = tmp[:len(tmp)-1]
maxHeapify(tmp, 0)
}
}
測試文件為:
//sort_test.go
import (
"testing"
)
func BenchmarkHeapSort(b *testing.B) {
a := GenRandSlice(10000, 10000)
for i := 0; i < b.N; i++ {
HeapSort(a)
}
}
func BenchmarkQSort(b *testing.B) {
a := GenRandSlice(10000, 10000)
for i := 0; i < b.N; i++ {
QSort(a, 0, len(a)-1)
}
}
測試命令:
go test -bench=.
goos: darwin
goarch: amd64
pkg: go_practice/algorithm/mysort
BenchmarkHeapSort-4 2000 914686 ns/op
BenchmarkQSort-4 10 120658646 ns/op
PASS
ok go_practice/algorithm/mysort 3.269s
每ns快速排序執(zhí)行的操作遠(yuǎn)遠(yuǎn)高于堆排祖凫,相比較來說,快排確實高效酬凳。另外惠况,goalng的testing真是好用,各種想要的功能都有粱年。性能測試了售滤,還可以對cpu和mem做進(jìn)一步分析,詳細(xì)的指令可查看:
go test -h
這里只截取一部分
-cpuprofile cpu.out
Write a CPU profile to the specified file before exiting.
Writes test binary as -c would.
-memprofile mem.out
Write an allocation profile to the file after all tests have passed.
Writes test binary as -c would.
-memprofilerate n
Enable more precise (and expensive) memory allocation profiles by
setting runtime.MemProfileRate. See 'go doc runtime.MemProfileRate'.
To profile all memory allocations, use -test.memprofilerate=1.
-mutexprofile mutex.out
Write a mutex contention profile to the specified file
when all tests are complete.
Writes test binary as -c would.
-mutexprofilefraction n
Sample 1 in n stack traces of goroutines holding a
contended mutex.
-outputdir directory
Place output files from profiling in the specified directory,
by default the directory in which "go test" is running.
-trace trace.out
Write an execution trace to the specified file before exiting.
如執(zhí)行命令 go test -test.bench="BenchmarkHeapSort" -cpuprofile cpu.out
,會得到兩個文件:
cpu.out mysort.test
cpu.out是cpu采樣結(jié)果台诗,mysort.test是測試的二進(jìn)制文件完箩,使用命令go tool pprof mysort.test cpu.out
可得到如下結(jié)果:
File: mysort.test
Type: cpu
Time: Feb 17, 2019 at 12:55pm (CST)
Duration: 2.06s, Total samples = 1.67s (80.90%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top10
Showing nodes accounting for 1.67s, 100% of 1.67s total
Showing top 10 nodes out of 16
flat flat% sum% cum cum%
1.06s 63.47% 63.47% 1.38s 82.63% go_practice/algorithm/mysort.maxHeapify
0.30s 17.96% 81.44% 0.30s 17.96% go_practice/algorithm/mysort.swap (inline)
0.12s 7.19% 88.62% 0.12s 7.19% runtime.newstack
0.08s 4.79% 93.41% 0.08s 4.79% go_practice/algorithm/mysort.left (inline)
0.05s 2.99% 96.41% 1.50s 89.82% go_practice/algorithm/mysort.HeapSort
0.04s 2.40% 98.80% 0.04s 2.40% runtime.nanotime
0.01s 0.6% 99.40% 0.14s 8.38% go_practice/algorithm/mysort.BuildMaxHeap
0.01s 0.6% 100% 0.01s 0.6% runtime.kevent
0 0% 100% 1.50s 89.82% go_practice/algorithm/mysort.BenchmarkHeapSort
0 0% 100% 0.12s 7.19% runtime.morestack
再對QSort
做測試:
go test -test.bench="BenchmarkQSort" -cpuprofile cpu.out
go tool pprof mysort.test cpu.out
File: mysort.test
Type: cpu
Time: Feb 17, 2019 at 12:58pm (CST)
Duration: 1.45s, Total samples = 1.16s (79.90%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top10
Showing nodes accounting for 1.16s, 100% of 1.16s total
Showing top 10 nodes out of 20
flat flat% sum% cum cum%
0.80s 68.97% 68.97% 0.80s 68.97% math/rand.seedrand (inline)
0.25s 21.55% 90.52% 1.05s 90.52% math/rand.(*rngSource).Seed
0.05s 4.31% 94.83% 0.05s 4.31% runtime.nanotime
0.03s 2.59% 97.41% 0.03s 2.59% runtime.walltime
0.02s 1.72% 99.14% 0.02s 1.72% runtime.usleep
0.01s 0.86% 100% 0.01s 0.86% runtime.kevent
0 0% 100% 1.08s 93.10% go_practice/algorithm/mysort.BenchmarkQSort
0 0% 100% 1.08s 93.10% go_practice/algorithm/mysort.QSort
0 0% 100% 1.05s 90.52% math/rand.(*Rand).Seed
0 0% 100% 1.05s 90.52% math/rand.(*lockedSource).seedPos