Java Stream Map和flatmap及集合處理

編程中多數要對集合進行各種操作,獲取Map、Set、List等。
Map

比如一個對象List,獲取所有人的名字集合

    @Test
    public void should_can_get_name_map() {
        PersonInfo kaka = new PersonInfo("Kaka", 22);
        PersonInfo hustzw = new PersonInfo("Hustzw", 24);

        List<PersonInfo> personInfos = Lists.newArrayList(kaka, hustzw);

        List<String> nameList = personInfos.stream().map(PersonInfo::getName).collect(Collectors.toList());

        assertThat(nameList).contains("Kaka");
    }

HashMap

構建一個屬性和其本身的映射,比如根據人名找到人。

    @Test
    public void should_can_get_name_info_map() {
        PersonInfo kaka = new PersonInfo("Kaka", 22);
        PersonInfo hustzw = new PersonInfo("Hustzw", 24);

        List<PersonInfo> personInfos = Lists.newArrayList(kaka, hustzw);

        // 注意, 這裏 key 不能重複,否則報錯
        Map<String, PersonInfo> nameInfoMap1 = personInfos.stream().collect(Collectors.toMap(PersonInfo::getName, x -> x));
        Map<String, PersonInfo> nameInfoMap2 = personInfos.stream().collect(Collectors.toMap(PersonInfo::getName, Function.identity()));// 效果一樣

        assertThat(nameInfoMap1).containsKeys("Kaka", "Hustzw");
        assertThat(nameInfoMap2).containsKeys("Kaka", "Hustzw");
    }

Set

同樣是獲取集合

    @Test
    public void should_can_get_set() {
        PersonInfo kaka = new PersonInfo("Kaka", 22);
        PersonInfo hustzw = new PersonInfo("Hustzw", 24);

        List<PersonInfo> personInfos = Lists.newArrayList(kaka, hustzw);

        // 注意, 這裏 key 不能重複,否則報錯
        Set<PersonInfo> adultsSet = personInfos.stream().filter(x -> x.getAge() >= 18).collect(Collectors.toSet());

        assertThat(adultsSet).contains(kaka, hustzw);
        assertThat(adultsSet).contains(hustzw, kaka);
    }

flatmap

Java8的stream中提供flatmap也是用於構建Map,不過區別的是它會把結果打平。看個例子:

有兩個List,想對所有元素按年齡分類。如果利用循環可能需要兩層循環。

利用 flatemap後,打平後很容易對其操作。

代碼:

    @Test
    public void should_can_get_name_info_flat_map() {
        PersonInfo kaka = new PersonInfo("Kaka", 22);
        PersonInfo hustzw = new PersonInfo("Hustzw", 22);
        PersonInfo zhangSan = new PersonInfo("ZhangSan", 14);
        PersonInfo lisi = new PersonInfo("LiSi", 14);

        List<PersonInfo> firstInfos = Lists.newArrayList(kaka, zhangSan, zhangSan);
        List<PersonInfo> secondInfos = Lists.newArrayList(hustzw, lisi);

        List<List<PersonInfo>> complexInfos = Lists.newArrayList(firstInfos, secondInfos);

        // 按年齡分組,不會去重
        Map<Integer, List<PersonInfo>> map = complexInfos.stream().flatMap(l -> l.stream()).collect(Collectors.groupingBy(PersonInfo::getAge));

        // 如果需要去重複
        Map<Integer, List<PersonInfo>> distinctMap = complexInfos.stream().flatMap(l -> l.stream())// 重新生成一個Stream對象取而代之
                .distinct().collect(Collectors.groupingBy(PersonInfo::getAge));

        assertThat(map).containsKey(14);
        assertThat(map.get(14).size()).isEqualTo(3);
        assertThat(distinctMap.get(14).size()).isEqualTo(2);
    }

集合處理

當然還有一些用於對集合過濾、統計的。看個例子:

@Data
@Builder
@AllArgsConstructor
public class Work {

    private String name;

    private String department;

    private Integer age;
}

下邊對該類型的集合進行處理:

public class MyTest {

    private List<Work> personList;

    @Before
    public void setup() {

        personList = Lists.newArrayList();
        personList.add(Work.builder().name("zhangsan").department("IT").age(20).build());
        personList.add(Work.builder().name("lisi").department("IT").age(22).build());
        personList.add(Work.builder().name("wangwu").department("HR").age(28).build());
        personList.add(Work.builder().name("zhaoliu").department("Purchasing").age(23).build());
        personList.add(Work.builder().name("liuqi").department("IT").age(22).build());
    }

    @Test
    public void should_can_match() {
        // 是否全部 20-30 歲
        boolean allYoung = personList.stream().allMatch(work -> (20 <= work.getAge() && work.getAge() <= 30));

        // 是否存在 age = 22的
        boolean anyTwentyFive = personList.stream().anyMatch(work -> 22 == work.getAge());

        // 沒有 age = 25的
        boolean noneTwentyFive = personList.stream().noneMatch(work -> 25 == work.getAge());

        // 查找第一個滿足條件的
        Optional<Work> firstTwentyTwo = personList.stream().filter(work -> 22 <= work.getAge()).findFirst();

        // 查找任何一個滿足條件的,注意不一定是第一個
        Optional<Work> anyTwentyTwo = personList.stream().filter(work -> 22 <= work.getAge()).findAny();
    }

    @Test
    public void should_can_filter_it_work() {
        // 年齡集合
        List<Integer> ageList = personList.stream().map(Work::getAge).distinct().collect(Collectors.toList());

        //  最大年齡
        Optional<Work> maxAgeWork = personList.stream().collect(Collectors.maxBy((s1, s2) -> s1.getAge() - s2.getAge()));
        // 或者
        Optional<Work> maxAgeWork2 = personList.stream().collect(Collectors.maxBy(Comparator.comparing(Work::getAge)));

        // 最小年齡
        Optional<Work> minAgeWork = personList.stream().collect(Collectors.minBy(Comparator.comparing(Work::getAge)));

        // 部門年齡求和
        Integer sumAgeForIT = personList.stream().filter(work -> "IT".equalsIgnoreCase(work.getDepartment())).collect(Collectors.summingInt(Work::getAge));

        // 平均年齡
        Double avgAge = personList.stream().collect(Collectors.averagingInt(Work::getAge));

        // 名字拼接
        String nameList = personList.stream().map(Work::getName).collect(Collectors.joining(","));

        // 按部門group by
        Map<String, List<Work>> deparmentWorkMap = personList.stream().collect(Collectors.groupingBy(Work::getDepartment));

        // 按部門group by並計數
        Map<String, Long> deparmentWorkCountMap = personList.stream().collect(Collectors.groupingBy(Work::getDepartment, Collectors.counting()));

        // 按部門、年齡兩級group by
        Map<String, Map<Integer, List<Work>>> deparmentAgeMap = personList.stream().collect(Collectors.groupingBy(Work::getDepartment, Collectors.groupingBy(Work::getAge)));

        // 一分爲二:IT部分和非IT部門
        Map<Boolean, List<Work>> partition = personList.stream().collect(Collectors.partitioningBy(work -> "IT".equals(work.getDepartment())));

    }

    @Test
    public void should_can_statistic_int() {
        List<Integer> primes = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
        IntSummaryStatistics stats = primes.stream().mapToInt(x -> x).summaryStatistics();

        System.out.println(stats);
    }


    // 還有一些複雜
    @Test
    public void should_can_sort_by_asc() {
        Map<String, Work> maxAgeWork = personList.stream().collect(Collectors.toMap(Work::getName, Function.identity(), BinaryOperator.maxBy(Comparator.comparing(Work::getAge))));

        maxAgeWork.forEach((name, work) -> {
            System.out.println("name is [" + name + "], age is [" + work.getAge() + "]");
        });
    }

    // TODO summaryStatistics 方法可以幫我們獲取:最大值,最小值,和,平均值信息
    // 返回值爲(Int/Long/Double)的 SummaryStatistics

    // 獲取每個部門的統計信息
    @Test
    public void should_can_get_group_by_age() {
        Map<String, IntSummaryStatistics> result = personList.stream().collect(Collectors.groupingBy(Work::getDepartment, Collectors.summarizingInt(Work::getAge)));

        System.out.println("Department | Statistics");
        for (Map.Entry<String, IntSummaryStatistics> entry : result.entrySet()) {
            System.out.printf("%10s : %70s\n", entry.getKey(), entry.getValue().toString());
        }
    }
}

返回值Int/Long/Double對應的統計方法分別是IntSummaryStatistics、LongSummaryStatistics、DoubleSummaryStatistics。

值得說的是:summaryStatistics 方法幫我們獲取:最大值,最小值,和,平均值信息。即簡單的數據類型的統計,就像這樣:

Department | Statistics
Purchasing : IntSummaryStatistics{count=1, sum=23, min=23, average=23.000000, max=23}
        HR : IntSummaryStatistics{count=1, sum=28, min=28, average=28.000000, max=28}
        IT : IntSummaryStatistics{count=3, sum=64, min=20, average=21.333333, max=22}

apache的SummaryStatistics

如果想要一些複雜的統計,可以考慮apache的commons-math3的SummaryStatistics。

compile "org.apache.commons:commons-math3:3.5"

SummaryStatistics 可計算數組中最小值,最大值,平均值,幾何平均,和,平方和,標準差,方差

DescriptiveStatistics 計算數組中最小值,最大值,平均值,幾何平均,和,平方和,標準差,方差,百分位數,偏態,峯度,中位數。

Frequency 統計列表中的某些數據出現的頻次

    @Test
    public void should_calculate_summary() {

        SummaryStatistics statistics = new SummaryStatistics();
        personList.stream().forEach(p -> statistics.addValue(p.getAge()));

        System.out.println(statistics);
        System.out.printf("%30s: %30s\n", "統計項", "數據");
        System.out.printf("%30s: %30s\n", "數據個數", statistics.getN());
        System.out.printf("%30s: %30s\n", "最小值", statistics.getMin());
        System.out.printf("%30s: %30s\n", "最大值", statistics.getMax());
        System.out.printf("%30s: %30s\n", "平均值", statistics.getMean());
        System.out.printf("%30s: %30s\n", "幾何平均值", statistics.getGeometricMean());
        System.out.printf("%30s: %30s\n", "方差", statistics.getVariance());
        System.out.printf("%30s: %30s\n", "總體方差", statistics.getPopulationVariance());
        System.out.printf("%30s: %30s\n", "二階矩", statistics.getSecondMoment());
        System.out.printf("%30s: %30s\n", "二階矩", statistics.getSecondMoment());
        System.out.printf("%30s: %30s\n", "平方和", statistics.getSumsq());
        System.out.printf("%30s: %30s\n", "標準差", statistics.getStandardDeviation());
        System.out.printf("%30s: %30s\n", "對數和", statistics.getSumOfLogs());

        Frequency frequency = new Frequency();
        personList.stream().forEach(p -> frequency.addValue(p.getAge()));
        System.out.println(frequency);

        DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();
        personList.stream().forEach(p -> descriptiveStatistics.addValue(p.getAge()));
        System.out.println(descriptiveStatistics);

        // SimpleRegression 線性模型最小二乘迴歸
        // OLSMultipleLinearRegression和GLSMultipleLinearRegression:提供最小二乘迴歸擬合線性模型
        // NaturalRanking:排名轉換
        // Covariance:協方差相關
    }

還有
SimpleRegression:線性模型最小二乘迴歸
OLSMultipleLinearRegression、GLSMultipleLinearRegression:提供最小二乘迴歸擬合線性模型。
NaturalRanking:排名轉換。
Covariance:協方差相關
有興趣的可以自己嘗試下

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章