快排和堆排性能对比

之前经常使用golang测试框架中的单元测试，一直没用性能测试，今天想熟悉一下golang的Benchmark顺便给堆排和快排做个性能测试，测试非常简单，源代码如下：

//sort.go
package mysort

import (
    "math/rand"
    "time"
)

func swap(nums []int, i, j int) {
    nums[i], nums[j] = nums[j], nums[i]
}

func parition(nums []int, start, end int) int {
    idx := rand.Int()%(end-start) + 1 + start
    swap(nums, idx, end)
    idx = end
    for start < end {
        for nums[start] <= nums[idx] && start < end {
            start++
        }
        for nums[end] >= nums[idx] && start < end {
            end--
        }
        swap(nums, start, end)
    }
    swap(nums, start, idx)
    return start
}

//quick sort
func QSort(nums []int, start, end int) {
    rand.Seed(time.Now().UnixNano())
    if start < end {
        p := parition(nums, start, end)
        QSort(nums, start, p-1)
        QSort(nums, p+1, end)
    }
}

//生成一个随机的数组，长度为len, 元素最大值不超过max
func GenRandSlice(len, max int) []int {
    rand.Seed(time.Now().UnixNano())
    a := make([]int, 0)
    for i := 0; i < len; i++ {
        a = append(a, rand.Int()%max)
    }
    return a
}

//堆排序
func left(i int) int {
    return i << 1
}

func right(i int) int {
    return i<<1 + 1
}

func maxHeapify(a []int, i int) {
    l := left(i)
    r := right(i)
    max := i
    aLen := len(a)
    if l < aLen && a[l] > a[max] {
        max = l
    }
    if r < aLen && a[r] > a[max] {
        max = r
    }
    if max != i {
        swap(a, i, max)
        maxHeapify(a, max)
    }
}

func BuildMaxHeap(a []int) {
    aLen := len(a)
    if aLen == 0 {
        return
    }
    for i := aLen/2 - 1; i >= 0; i-- {
        maxHeapify(a, i)
    }
}

func HeapSort(a []int) {
    BuildMaxHeap(a)
    aLen := len(a)
    tmp := a[:]
    for i := aLen - 1; i >= 1; i-- {
        swap(tmp, 0, i)
        tmp = tmp[:len(tmp)-1]
        maxHeapify(tmp, 0)
    }
}

测试文件为：

//sort_test.go
import (
    "testing"
)

func BenchmarkHeapSort(b *testing.B) {
    a := GenRandSlice(10000, 10000)
    for i := 0; i < b.N; i++ {
        HeapSort(a)
    }
}

func BenchmarkQSort(b *testing.B) {
    a := GenRandSlice(10000, 10000)
    for i := 0; i < b.N; i++ {
        QSort(a, 0, len(a)-1)
    }
}

测试命令：

go test -bench=.

goos: darwin
goarch: amd64
pkg: go_practice/algorithm/mysort
BenchmarkHeapSort-4         2000        914686 ns/op
BenchmarkQSort-4              10     120658646 ns/op
PASS
ok      go_practice/algorithm/mysort    3.269s

每ns快速排序执行的操作远远高于堆排，相比较来说，快排确实高效。另外，goalng的testing真是好用，各种想要的功能都有。性能测试了，还可以对cpu和mem做进一步分析，详细的指令可查看：

go test -h

这里只截取一部分

-cpuprofile cpu.out
        Write a CPU profile to the specified file before exiting.
        Writes test binary as -c would.

    -memprofile mem.out
        Write an allocation profile to the file after all tests have passed.
        Writes test binary as -c would.

    -memprofilerate n
        Enable more precise (and expensive) memory allocation profiles by
        setting runtime.MemProfileRate. See 'go doc runtime.MemProfileRate'.
        To profile all memory allocations, use -test.memprofilerate=1.

    -mutexprofile mutex.out
        Write a mutex contention profile to the specified file
        when all tests are complete.
        Writes test binary as -c would.

    -mutexprofilefraction n
        Sample 1 in n stack traces of goroutines holding a
        contended mutex.

    -outputdir directory
        Place output files from profiling in the specified directory,
        by default the directory in which "go test" is running.

    -trace trace.out
        Write an execution trace to the specified file before exiting.

如执行命令 go test -test.bench="BenchmarkHeapSort" -cpuprofile cpu.out ,会得到两个文件：

cpu.out mysort.test cpu.out是cpu采样结果，mysort.test是测试的二进制文件，使用命令 go tool pprof mysort.test cpu.out 可得到如下结果：

File: mysort.test
Type: cpu
Time: Feb 17, 2019 at 12:55pm (CST)
Duration: 2.06s, Total samples = 1.67s (80.90%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top10
Showing nodes accounting for 1.67s, 100% of 1.67s total
Showing top 10 nodes out of 16
      flat  flat%   sum%        cum   cum%
     1.06s 63.47% 63.47%      1.38s 82.63%  go_practice/algorithm/mysort.maxHeapify
     0.30s 17.96% 81.44%      0.30s 17.96%  go_practice/algorithm/mysort.swap (inline)
     0.12s  7.19% 88.62%      0.12s  7.19%  runtime.newstack
     0.08s  4.79% 93.41%      0.08s  4.79%  go_practice/algorithm/mysort.left (inline)
     0.05s  2.99% 96.41%      1.50s 89.82%  go_practice/algorithm/mysort.HeapSort
     0.04s  2.40% 98.80%      0.04s  2.40%  runtime.nanotime
     0.01s   0.6% 99.40%      0.14s  8.38%  go_practice/algorithm/mysort.BuildMaxHeap
     0.01s   0.6%   100%      0.01s   0.6%  runtime.kevent
         0     0%   100%      1.50s 89.82%  go_practice/algorithm/mysort.BenchmarkHeapSort
         0     0%   100%      0.12s  7.19%  runtime.morestack

再对 QSort 做测试：

go test -test.bench="BenchmarkQSort" -cpuprofile cpu.out

go tool pprof mysort.test cpu.out

File: mysort.test
Type: cpu
Time: Feb 17, 2019 at 12:58pm (CST)
Duration: 1.45s, Total samples = 1.16s (79.90%)
Entering interactive mode (type "help" for commands, "o" for options)
(pprof) top10
Showing nodes accounting for 1.16s, 100% of 1.16s total
Showing top 10 nodes out of 20
      flat  flat%   sum%        cum   cum%
     0.80s 68.97% 68.97%      0.80s 68.97%  math/rand.seedrand (inline)
     0.25s 21.55% 90.52%      1.05s 90.52%  math/rand.(*rngSource).Seed
     0.05s  4.31% 94.83%      0.05s  4.31%  runtime.nanotime
     0.03s  2.59% 97.41%      0.03s  2.59%  runtime.walltime
     0.02s  1.72% 99.14%      0.02s  1.72%  runtime.usleep
     0.01s  0.86%   100%      0.01s  0.86%  runtime.kevent
         0     0%   100%      1.08s 93.10%  go_practice/algorithm/mysort.BenchmarkQSort
         0     0%   100%      1.08s 93.10%  go_practice/algorithm/mysort.QSort
         0     0%   100%      1.05s 90.52%  math/rand.(*Rand).Seed
         0     0%   100%      1.05s 90.52%  math/rand.(*lockedSource).seedPos

Recommend

对标Bert？刷屏的GPT 2.0意味着什么

解析百度Apollo之决策规划模块

关于 Java 中 Runtime.class.getClass() 的细节分析

Using ESLint and Prettier in a TypeScript Project

这份攻略帮你「稳住」反复无常的 GAN

Science：人工智能的发展离不开神经科学，先天结构或是下一个方向

攻防系统之SSH入侵的检测和响应

Formal Verification of Y86-64 Processors

Author Q&A on the Book Business Analysis Agility

分布式系统学习：二阶段提交

About Joyk