前言
有一段時間沒有使用Scala
, 我們今天回顧下Scala
的常用的函數算子, 並使用一些練習加深理解和印象. 這些函數對於理解Spark
的相關算子非常有用. 建議自己練習.
所有的練習代碼, 都可以在 https://github.com/SeanYanxml/arsenal/tree/master/arsenal-scala/quick-scala/quick-scala 上找到.
常見集合函數
foreach()
override def foreach[U](f : scala.Function1[A, U]) : scala.Unit = { /* compiled code */ }
遍歷數組&集合.
scala> val lst0 = List(1,2,3,4,5,6)
lst0: List[Int] = List(1, 2, 3, 4, 5, 6)
scala> lst0.foreach(print(_))
123456
map()
final override def map[B, That](f : scala.Function1[A, B])(implicit bf : scala.collection.generic.CanBuildFrom[scala.collection.immutable.List[A], B, That]) : That = { /* compiled code */ }
用於遍歷數組、Map集合、List集合、Tuple元祖內的每一個元素. 傳入的是一個
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
scala> val lst1 = lst0.map(_*10)
lst1: List[Int] = List(10, 70, 90, 80, 0, 30, 50, 40, 60, 20)
- Tips -
foreach()
&map()
foreach()
與map()
方法的區別在於,foreach()
無返回值, 而map()
有返回值.
scala> val lst0 = List(1,2,3,4,5)
lst0: List[Int] = List(1, 2, 3, 4, 5)
scala> val lst1 = lst0.foreach(_*10)
lst1: Unit = ()
# 無輸出
scala> lst1
scala> val lst2 = lst0.map(_*10)
lst2: List[Int] = List(10, 20, 30, 40, 50)
# 含有輸出
scala> lst2
res2: List[Int] = List(10, 20, 30, 40, 50)
map().flatten
/flatMap()
def flatten[B](implicit asTraversable : scala.Function1[A, scala.collection.GenTraversableOnce[B]]) : CC[B] = { /* compiled code */ }
final override def flatMap[B, That](f : scala.Function1[A, scala.collection.GenTraversableOnce[B]])(implicit bf : scala.collection.generic.CanBuildFrom[scala.collection.immutable.List[A], B, That]) : That = { /* compiled code */ }
先進行map()
函數操作, 再將其進行壓平.
scala> val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
lines: List[String] = List(hello tom hello jerry, hello jerry, hello kitty)
# 獲得裏面是字符串數組的List對象
scala> val linesSplit = lines.map(_.split(" "))
linesSplit: List[Array[String]] = List(Array(hello, tom, hello, jerry), Array(hello, jerry), Array(hello, kitty))
# 將所有的數組都壓平,壓在一個List內部
scala> val linesSplitFlat = lines.map(_.split(" ")).flatten
linesSplitFlat: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty)
# 使用flatMap
scala> val linesSplitFlat2 = lines.flatMap(_.split(" "))
linesSplitFlat2: List[String] = List(hello, tom, hello, jerry, hello, jerry, hello, kitty)```
filter()
def filter(p : scala.Function1[A, scala.Boolean]) : Repr = { /* compiled code */ }
過濾, 過濾出數組或集合
內滿足篩選條件的數據.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
scala> val list3 = lst0.filter(_>5)
list3: List[Int] = List(7, 9, 8, 6)
sorted
/sortedBy()
/sortedWith()
- def sorted[B >: A](implicit ord : scala.math.Ordering[B]) : Repr = { /* compiled code */ }
- def sortBy[B](f : scala.Function1[A, B])(implicit ord : scala.math.Ordering[B]) : Repr = { /* compiled code */ }
- def sortWith(lt : scala.Function2[A, A, scala.Boolean]) : Repr = { /* compiled code */ }
三個函數都可以用於排序. 其中sorted
是簡單排序, sortedBy
可以指定已某一個屬性進行排序, sortedWith
可以指定排序的比較函數.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
scala> val lst3_1 = lst0.sorted
lst3_1: List[Int] = List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
scala> val lst3_2 = lst0.sortBy(x =>x)
lst3_2: List[Int] = List(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
scala> val lst3_2 = lst0.sortBy(x => (-x))
lst3_2: List[Int] = List(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
# 傳入一個compare()函數
scala> val lst3_3 = lst0.sortWith((x,y) => (x>y))
lst3_3: List[Int] = List(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
reverse
override def reverse : scala.collection.immutable.List[A] = { /* compiled code */ }
反轉.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
scala> val lst4 = lst0.reverse
lst4: List[Int] = List(2, 6, 4, 5, 3, 0, 8, 9, 7, 1)
grouped()
/groupedBy()
- def grouped(size : scala.Int) : scala.collection.Iterator[Repr] = { /* compiled code */ }
- def groupBy[K](f : scala.Function1[A, K]) : scala.collection.immutable.Map[K, Repr] = { /* compiled code */ }
grouped()
是將幾個元素進行組合, 返回的是一個List<Iterator>
的List;
groupedBy()
指定分類的函數, 返回的是一個Map<K,List[Value]>
的Map.
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2)
#grouped()
scala> val lst5 = lst0.grouped(4)
lst5: Iterator[List[Int]] = non-empty iterator
scala> lst5.toList
res1: List[List[Int]] = List(List(1, 7, 9, 8), List(0, 3, 5, 4), List(6, 2))
# groupedBy()
scala> val lst0 = List(1,7,9,8,0,3,5,4,6,2,3,2,3)
lst0: List[Int] = List(1, 7, 9, 8, 0, 3, 5, 4, 6, 2, 3, 2, 3)
scala> val lst8 = lst0.groupBy(x => (x))
lst8: scala.collection.immutable.Map[Int,List[Int]] = Map(0 -> List(0), 5 -> List(5), 1 -> List(1), 6 -> List(6), 9 -> List(9), 2 -> List(2, 2), 7 -> List(7), 3 -> List(3, 3, 3), 8 -> List(8), 4 -> List(4))
scala> val lst8 = lst0.groupBy(x => (x)).toList
lst8: List[(Int, List[Int])] = List((0,List(0)), (5,List(5)), (1,List(1)), (6,List(6)), (9,List(9)), (2,List(2, 2)), (7,List(7)), (3,List(3, 3, 3)), (8,List(8)), (4,List(4)))
scala> val lst8 = lst0.groupBy(x => (x%2==1)).toList
lst8: List[(Boolean, List[Int])] = List((false,List(8, 0, 4, 6, 2, 2)), (true,List(1, 7, 9, 3, 5, 3, 3)))
reduce()
def reduce[A1 >: A](op : scala.Function2[A1, A1, A1]) : A1 = { /* compiled code */ }
並行化計算.
scala> val array = Array(1,2,3,4,5,6,7)
array: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7)
scala> val sum = array.reduce(_+_)
sum: Int = 28
scala> array.par.reduce(_+_)
res3: Int = 28
flod()
def fold[U >: T](z : U)(op : scala.Function2[U, U, U]) : U = { /* compiled code */ }
scala> val array = Array(1,2,3,4,5,6,7)
array: Array[Int] = Array(1, 2, 3, 4, 5, 6, 7)
scala> array.fold(10)(_+_)
res4: Int = 38
scala> array.par.fold(10)(_+_)
res5: Int = 98
scala> array.par.fold(0)(_+_)
res6: Int = 28
aggregate()
def aggregate[B](z : => B)(seqop : scala.Function2[B, A, B], combop : scala.Function2[B, B, B]) : B = { /* compiled code */ }
聚合函數.
scala> val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
arr: List[List[Int]] = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
scala> arr.aggregate(0)(_+_.sum, _+_)
res7: Int = 20
- 交集、並集、差集 (
intersect
/union
/diff
)
- def intersect[B >: A](that : scala.collection.GenSeq[B]) : Repr = { /* compiled code */ }
- override def union[B >: A, That](that : scala.collection.GenSeq[B])(implicit bf : scala.collection.generic.CanBuildFrom[Repr, B, That]) : That = { /* compiled code */ }
- def diff[B >: A](that : scala.collection.GenSeq[B]) : Repr = { /* compiled code */ }
scala> val l1 = List(5,6,4,7)
l1: List[Int] = List(5, 6, 4, 7)
scala> val l2 = List(1,2,3,4)
l2: List[Int] = List(1, 2, 3, 4)
# 並集
scala> val unionL1 = l1.union(l2)
unionL1: List[Int] = List(5, 6, 4, 7, 1, 2, 3, 4)
# 交集
scala> val insercetionL1 = l1.intersect(l2)
insercetionL1: List[Int] = List(4)
# 差集
scala> val differenceL1 = l1.diff(l2)
differenceL1: List[Int] = List(5, 6, 7)
其他函數
split()
public String[] split(String regex) {
return split(regex, 0);
}
分割字符串, 返回一個字符串數組.
scala> val str1 = "a b c d e"
str1: String = a b c d e
scala> val strArray1 = str1.split(" ")
strArray1: Array[String] = Array(a, b, c, d, e)
scala> strArray1
res4: Array[String] = Array(a, b, c, d, e)
練習
Test1
- 創建一個List
val lst0 = List(1,7,9,8,0,3,5,4,6,2)
- 將lst0中每個元素乘以10後生成一個新的集合
- 將lst0中的偶數取出來生成一個新的集合
- 將lst0排序後生成一個新的集合
- 反轉順序
- 將lst0中的元素4個一組,類型爲Iterator[List[Int]]
- 將Iterator轉換成List
- 將多個list壓扁成一個List
Test2
val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
- 先按空格切分,再壓平
- 計算WordCount
Test3
val array = Array(1,2,3,4,5,6,7)
- 並行計算求和(reduce / fold)
Test4
val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
- 聚合計算
Test5
val l1 = List(5,6,4,7)
val l2 = List(1,2,3,4)
- 求差集、交集、並集
相關練習與結果
package com.yanxml.quick_scala.basic.train
import org.junit.Test
/**
* 快速訓練Scala的基礎語法.
* @Date 2019-04-24
* */
class QuickBasicTrain {
@Test
def train(){
//創建一個List
val lst0 = List(1,7,9,8,0,3,5,4,6,2)
//將lst0中每個元素乘以10後生成一個新的集合
val lst1 = lst0.map(_*10)
//將lst0中的偶數取出來生成一個新的集合
val lst2 = lst0.filter(_%2==0)
//將lst0排序後生成一個新的集合
val lst3_1 = lst0.sorted
val lst3_2 = lst0.sortBy(x =>x)
val lst3_3 = lst0.sortWith((x,y) => (x>y))
//反轉順序
val lst4 = lst0.reverse
//將lst0中的元素4個一組,類型爲Iterator[List[Int]]
val lst5 = lst0.grouped(4)
//將Iterator轉換成List
val lst6 = lst0.grouped(4).toList
//將多個list壓扁成一個List
val lst7 = lst0.grouped(4).toList.flatten
val lst8 = lst0.groupBy(x => (x%2==1))
val lines = List("hello tom hello jerry", "hello jerry", "hello kitty")
//先按空格切分,在壓平
val linesSplitFlat = lines.map(_.split(" ")).flatten
val linesSplitFlat2 = lines.flatMap(_.split(" "))
val linesCountOfOne = linesSplitFlat.map((_,1))
// (<hello>(<Hello,1><hello,2>))(...)
val linesGroupBy = linesCountOfOne.groupBy(_._1)
// 錯誤示範: linesGroupBy.map(_._1,_._2.size)
val linesCount = linesGroupBy.map(t => (t._1, t._2.size))
// 默認Map是不支持排序的 需要
val linesCountSorted = linesCount.toList.sortBy(_._2)
val linesCountAllOne = lines.map(_.split(" ")).flatten.map((_,1)).groupBy(_._1).map(t => (t._1,t._2.size))
val linesCountAllOne2 = lines.map(_.split(" ")).flatten.map((_,1)).groupBy(_._1).mapValues(_.foldLeft(0)(_+_._2))
//並行計算求和
//化簡:reduce
val array = Array(1,2,3,4,5,6,7)
array.map(println)
array.map(x=> println(x+1))
array.foreach(println)
// 默認使用reduceLeft (((1+2)+3)+4)+5)+6)+7
val sum = array.reduce(_+_)
// 轉換爲並行化的接口
array.par.reduce(_+_)
//將非特定順序的二元操作應用到所有元素
//安裝特點的順序
//摺疊:有初始值(無特定順序)
// 使用柯里化 默認設置
array.fold(10)(_+_)
array.par.fold(10)(_+_)
array.par.fold(0)(_+_)
//摺疊:有初始值(有特定順序)
//聚合
val arr = List(List(1, 2, 3), List(3, 4, 5), List(2), List(0))
// aggregate(<初始值>)((函數1),(函數2))
// 聚合函數求和
arr.aggregate(0)(_+_.sum, _+_)
val l1 = List(5,6,4,7)
val l2 = List(1,2,3,4)
//求並集
val unionL1 = l1.union(l2)
//求交集
val insercetionL1 = l1.intersect(l2)
//求差集
val differenceL1 = l1.diff(l2)
// println(r3)
}
}
後記
雖然, 有些函數的定義還不能像Java
一樣完全看懂, 但是記錄於此. 多看幾次.
此外, 有許多的函數暫沒有列舉出來. 後續再進行補充.