問題
求數據流中最頻繁的元素,下面用MG算法近似解決求解這個問題。
代碼
Java
import java.util.*;
public class FrequentElement {
public static void main(String[] args) {
int k = 3;//計數器的個數
Random random = new Random();
Map<Integer, Integer> counters = new HashMap();
int i = 0;//這個用於統計總的數量
while (i < 10){//真實的數據流應該是源源不斷的,這裏簡化爲有限個了
Integer element = random.nextInt(5);
if (counters.size()<k){//模擬k個計數器
System.out.println("正在計數");
counters.put(element, counters.getOrDefault(element, 0)+1);
System.out.println(counters);
}else{//k個計數器都用上的時候,進行一輪減1
System.out.println("開始一輪減1");
System.out.println(counters);
//只能用迭代器,不可用foreach,避免ConcurrentModificationException
Iterator<Integer> iterator = counters.keySet().iterator();
while (iterator.hasNext()){
Integer key = iterator.next();
if(counters.get(key)==1) iterator.remove();
else counters.put(key, counters.get(key)-1);
}
System.out.println(counters);
}
i++;
}
int finalSum = 0;
for (Integer key:counters.keySet()) finalSum += counters.get(key);
System.out.println("與精確值最多相差"+(i-finalSum)/(k+1));
}
}
這個有點複雜,看一下模擬的效果
Scala
import scala.util.Random
object FrequentElement2 {
def main(args: Array[String]): Unit = {
val k=3
var counters = collection.mutable.HashMap[Int,Int]()
var i = 0
while (i<10){
val element = Random.nextInt(5)
if (counters.size < k){//模擬k個計數器
println("正在計數")
counters.put(element, counters.getOrElse(element, 0)+1);
println(counters);
}else{//k個計數器都用上的時候,進行一輪減1
println("開始一輪減1");
println(counters);
counters = counters.map{case (key,value)=>(key, value-1)}.filter(_._2>0)
println(counters)
}
i+=1
}
println(counters)
val finalSum = counters.values.sum
println("與精確值最多相差"+(i-finalSum)/(k+1));
}
}
Python
import random
k = 3
counters = {}
i = 0
while i < 10:
element = random.randint(0, 5)
if len(counters) < k:
print("正在計數")
counters[element] = counters.get(element, 0)+1
print(counters)
else:
print("開始一輪減1")
for key, value in counters.items():
if value > 1:
value -= 1
counters = dict((key, value) for key, value in counters.items() if value != 0)
print(counters)
i+=1
finalSum = sum(counters.values())
print("與精確值最多相差%d"%((i-finalSum)/(k+1)))