map篇
在C語言時代,一個map函數可能長成下面這個樣子
func mapF(f IntMapFunc, i ...int) []int {
y := make([]int, len(i), cap(i))
for j := range i {
y[j] = f(i[j])
}
return y
}
func mapFInplace(f IntMapFunc, i ...int) []int {
for j := range i {
i[j] = f(i[j])
}
return i
}
看起來不錯,但這個函數只適用於int,如果是int8,那麼golang強類型的坑會讓你抓瞎。不得不又複製一份。最終得到的結果是這會讓所有的程序設計者如鯁在喉。
如果不考慮效率?你能想到的最優美的寫法是什麼?
如果只用reflect包,那麼會是下面這個樣子
import "reflect"
func MapR(fi interface{}) functional.Function {
var f = reflect.ValueOf(fi)
return func(in []reflect.Value) (out []reflect.Value) {
slice := in[0]
sliceMap := reflect.MakeSlice(slice.Type(), slice.Len(), slice.Cap())
for i := slice.Len() - 1; i >= 0; i-- {
in[0] = slice.Index(i)
sliceMap.Index(i).Set(f.Call(in)[0])
}
out = append(out, sliceMap)
return out
}
}
func MapRInplace(fi interface{}) functional.Function {
var f = reflect.ValueOf(fi)
return func(in []reflect.Value) []reflect.Value {
slice := in[0]
for i := slice.Len() - 1; i >= 0; i-- {
in[0] = slice.Index(i)
slice.Index(i).Set(f.Call(in)[0])
}
in[0] = slice
return in
}
}
在golang中,一個function的interface{}
形式是type Function=func(in []reflect.Value) []reflect.Value
。如果一個變量類型是Function
,那麼它可以被綁定到一個地址上。我們使用下面的膠水函數,完成這兩個函數的全部封裝。
func Map(fi, fm interface{}) {
functional.MakeFunc(MapR(fi), fm)
return
}
func MapInplace(fi, fm interface{}) {
functional.MakeFunc(MapRInplace(fi), fm)
return
}
我們對這兩個函數做一個測試。
func BenchmarkMap(b *testing.B) {
var x func(i ...int) []int
Map(func(i int) int { return i + 1 }, &x)
var s = make([]int, 100000)
for i := 0; i < b.N; i++ {
s = x(s...)
}
}
func BenchmarkMapInplace(b *testing.B) {
var x func(i ...int) []int
Map(func(i int) int { return i + 1 }, &x)
var s = make([]int, 100000)
for i := 0; i < b.N; i++ {
x(s...)
}
}
func BenchmarkMap100000Raw(b *testing.B) {
var s = make([]int, 100000)
for i := 0; i < b.N; i++ {
s = mapF(add1, s...)
}
}
func BenchmarkMap100000RawInplace(b *testing.B) {
var s = make([]int, 100000)
for i := 0; i < b.N; i++ {
s = mapFInplace(add1, s...)
}
}
=== RUN TestMap
[2 3]
[3 4]
--- PASS: TestMap (0.00s)
BenchmarkMap-12 50 24199896 ns/op
BenchmarkMapInplace-12 50 24840064 ns/op
BenchmarkMap100000Raw-12 5000 319601 ns/op
BenchmarkMap100000RawInplace-12 10000 226000 ns/op
雖然特別優美,對於任意函數func(T) T
,都能綁定到一個函數指針func(...T) []T
或者func([]T) []T
上,並且它也實現了功能,但這也太慢了!比非通用的函數要慢100倍,也就是說這兩個函數如果使用到工程中,99%的時間會用在reflect上,這能玩?
注意到runtime-call是一個大量消耗cpu的點。我們使用接口對這種特性進行封裝。
type Handler interface {
Call(f interface{}, index int)
}
type Mapper struct {
F func(slice interface{}) Handler
FInplace(slice interface{}) Handler
}
type MapperF interface {
F(slice interface{}) Handler
FInplace(slice interface{}) Handler
}
type Mapper struct {
MapperF
CoreCount int
}
func (m Mapper) MapR(fi interface{}) functional.Function {
t := reflect.SliceOf(reflect.TypeOf(fi).In(0))
return func(in []reflect.Value) (out []reflect.Value) {
slice := in[0]
sliceMap := m.F(slice.Interface())
for i := slice.Len()-1; i >= 0; i-- {
sliceMap.Call(fi, i)
}
return append(out, reflect.ValueOf(sliceMap).Convert(t).Value())
}
}
func (m Mapper) MapR(fi interface{}) functional.Function {
t := reflect.SliceOf(reflect.TypeOf(fi).In(0))
return func(in []reflect.Value) (out []reflect.Value) {
slice := in[0]
sliceMap := m.FInplace(slice.Interface())
for i := slice.Len()-1; i >= 0; i-- {
sliceMap.Call(fi, i)
}
return append(out, reflect.ValueOf(sliceMap).Convert(t).Value())
}
}
注意到for循環彼此無關,我們再把for/Handler提出來。
import "sync"
func mapSlice(handler Handler, l, r int, f interface{}) {
for i := l; i < r; i++ {
handler.Call(f, i)
}
}
func MapSlice(handler Handler, l, r, coreCount int, f interface{}) Handler {
step := (r - l + coreCount - 1) / coreCount
if coreCount <= 1 || step <= 0 {
mapSlice(handler, l, r, f)
} else {
var wg sync.WaitGroup
wg.Add(coreCount)
for i := l; i < r; i += step {
go func(i int) {
mapSlice(handler, i, min(i+step, r), f)
wg.Done()
}(i)
}
wg.Wait()
}
return handler
}
對Mapper
略微修改
func (m Mapper) MapR(fi interface{}) functional.Function {
if m.CoreCount == 0 {
m.CoreCount = 1
}
t := reflect.SliceOf(reflect.TypeOf(fi).In(0))
return func(in []reflect.Value) []reflect.Value {
slice := in[0]
in[0] = reflect.ValueOf(MapSlice(m.F(slice.Interface()),
0, slice.Len(), m.CoreCount, fi)).Convert(t)
return in
}
}
func (m Mapper) MapRInplace(fi interface{}) functional.Function {
if m.CoreCount == 0 {
m.CoreCount = 1
}
t := reflect.SliceOf(reflect.TypeOf(fi).In(0))
return func(in []reflect.Value) []reflect.Value {
slice := in[0]
in[0] = reflect.ValueOf(MapSlice(m.FInplace(slice.Interface()),
0, slice.Len(), m.CoreCount, fi)).Convert(t)
return in
}
}
因爲Mapper的特性被提出爲MapperF,這時如果MapperF沒有被初始化,將會導致錯誤。因此我們再做一個自動反向注入的Mapper類。
type MapperTraits struct {
Mapper
functional.BaseTraitsInterface
}
func NewMapperTraits(handler interface{}, options ...interface{}) MapperTraits {
t := MapperTraits{
BaseTraitsInterface: functional.NewBaseTraits(handler),
}
for i := range options {
switch option := options[i].(type) {
case int:
t.CoreCount = option
}
}
return t
}
func (m MapperTraits) F(sliceI interface{}) Handler {
slice := reflect.ValueOf(sliceI)
handler := reflect.MakeSlice(m.GetTypeInfo(), slice.Len(), slice.Cap())
reflect.Copy(handler, slice)
return handler.Interface().(Handler)
}
func (m MapperTraits) FInplace(slice interface{}) Handler {
return reflect.ValueOf(slice).Convert(m.GetTypeInfo()).Interface().(Handler)
}
func (m MapperTraits) MapR(fi interface{}) functional.Function {
if m.MapperF == nil {
m.MapperF = m
}
return m.Mapper.MapR(fi)
}
func (m MapperTraits) Map(fi, fm interface{}) {
functional.MakeFunc(m.MapR(fi), fm)
}
func (m MapperTraits) MapRInplace(fi interface{}) functional.Function {
if m.MapperF == nil {
m.MapperF = m
}
return m.Mapper.MapRInplace(fi)
}
func (m MapperTraits) MapInplace(fi, fm interface{}) {
functional.MakeFunc(m.MapRInplace(fi), fm)
}
測試如下
func TestMap(t *testing.T) {
var z func(i ...int) []int
var i = []int{1, 2}
var mapper = NewMapperTraits(IntHandler{})
mapper.Map(add1, &z)
i = z(i...)
fmt.Println(i)
var mapper2 = NewMapperTraits(IntHandler{}, 2)
mapper2.Map(add1, &z)
i = z(i...)
fmt.Println(i)
}
func BenchmarkMapRaw(b *testing.B) {
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = mapF(add1, s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapRawInplace(b *testing.B) {
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = mapFInplace(add1, s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapper(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(IntMapper{}.MapR(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperTraits(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}).MapR(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperInplaceTraits(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}).MapRInplace(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperTraits8(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}, 8).MapR(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperInplaceTraits4(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}, 4).MapRInplace(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapperInplaceTraits8(b *testing.B) {
var x func(i ...int) []int
functional.MakeFunc(NewMapperTraits(IntHandler{}, 8).MapRInplace(func(i int) int { return i + 1 }), &x)
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = x(s...)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapRaw4(b *testing.B) {
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = MapSlice(IntHandler(s), 0, factor, 4, func(a int) int { return a + 1 }).(IntHandler)
}
//fmt.Println("|", s[0], "|")
}
func BenchmarkMapRaw8(b *testing.B) {
var s = make([]int, factor)
for i := 0; i < b.N; i++ {
s = MapSlice(IntHandler(s), 0, factor, 8, func(a int) int { return a + 1 }).(IntHandler)
}
//fmt.Println("|", s[0], "|")
}
=== RUN TestMap
[2 3]
[3 4]
--- PASS: TestMap (0.00s)
goos: windows
goarch: amd64
pkg: github.com/Myriad-Dreamin/functional-go/mr
// 100000000
BenchmarkMapRaw-12 3 371668433 ns/op
BenchmarkMapRawInplace-12 5 240799760 ns/op
BenchmarkMapper-12 2 628998950 ns/op
BenchmarkMapperTraits-12 2 623999250 ns/op
BenchmarkMapperInplaceTraits-12 2 513511500 ns/op
BenchmarkMapperTraits8-12 5 281000660 ns/op
BenchmarkMapperInplaceTraits4-12 10 158599680 ns/op
BenchmarkMapperInplaceTraits8-12 10 126300010 ns/op
BenchmarkMapRaw4-12 10 147497270 ns/op
BenchmarkMapRaw8-12 10 126496710 ns/op
// 100000
BenchmarkMap-12 50 24939844 ns/op
BenchmarkMapInplace-12 50 24620058 ns/op
測試結果是非原地Mapper的耗時爲0.028s/1e8次運算,與不適用此方法的Map幾乎相同。不錯,這樣的map函數應該已經恰到好處了。
Conclusion
最終我們得到了什麼。
如果對效率需求不高,那麼可以使用Map
直接生成Map函數。如果對效率的需求略高,只需要寫一個Slice的配接器。在此例中,爲:
type IntHandler []int
func (handler IntHandler) Call(f interface{}, index int) {
handler[index] = f.(func(int) int)(handler[index])
}
如果你對效率的需求真的特別特別高,那麼大可重新幹回老本行,複製粘貼文本替換。。
func r(i ...int) []int {
y := make([]int, len(i), cap(i))
for j := range i {
y[j] = i[j] + 1
}
return y
}
func rInplace(i ...int) []int {
for j := range i {
i[j] = i[j] + 1
}
return i
}
對於這個耦合度極高的Map函數而言,效率如下:
BenchmarkMapRaw-12 10 166700110 ns/op
BenchmarkMapRawInplace-12 20 85599920 ns/op
換來的只不過是要複製粘貼很多次,失去了封裝的好處!
順便說一下,在同等情況下c語言的運算力爲0.6s/1e8次。果然是時代變了,大人。
Reduce篇
留作課後習題
Reference
所有的代碼見github