【算法】B樹的Java源碼實現及Princeton版本源碼理解

2019.02.24

前言

想在業餘時間做個小工具,設計是不使用數據庫,而是用文件系統來存儲數據。爲了減少文件打開次數,提高索引效率,用B樹構建內存索引。B的原理就不介紹了,提供如下鏈接供擴展閱讀:

  1. Princeton算法課程slide:https://www.cs.princeton.edu/~rs/AlgsDS07/09BalancedTrees.pdf
  2. Princeton版本的B樹Java實現(可直接使用,本文會添加配圖說明和添加代碼中文註釋):
    https://algs4.cs.princeton.edu/code/edu/princeton/cs/algs4/BTree.java.html
  3. B樹可視化:
    https://www.cs.usfca.edu/~galles/visualization/BTree.html

網上很多博客都直接複製了Princeton版本的代碼,但Princeton版源碼生成的B樹,葉子節點上都有哨兵,跟手繪出來的B樹不太相同,所以撰寫本文予以說明。同時,爲了構建出與可視化出來更像的B樹,我對Princeton版本源碼略加修改,實現了自己的版本,在本文會有較多附圖說明三者之間的區別。

B樹例子

下面Java源碼都會用如下插入順序構建BTree:
BTree插入順序

Princeton版本源碼理解

哨兵

Princeton版本的BTree實現,採用了哨兵,使得任意一個結點(包含key1, …, keyi, …, keym),keyi對應子結點內的所有key值都>=keyi,並且都小於key(i+1)。採用哨兵的數據結構有效地簡化了代碼。

對於上一節的B樹例子,採用Princeton版本源碼生成的B樹如下圖所示:
Princeton版本的BTree

源碼

public class PrincetonBTree<Key extends Comparable<Key>, Value>  {
    // max children per B-tree node = M-1
    // (must be even and greater than 2)
    private static final int M = 4;

    private Node root;       // root of the B-tree
    private int height;      // height of the B-tree
    private int n;           // number of key-value pairs in the B-tree

    // helper B-tree node data type
    private static final class Node {
        private int m;                             // number of children
        private Entry[] children = new Entry[M];   // the array of children

        // create a node with k children
        private Node(int k) {
            m = k;
        }
    }

    // internal nodes: only use key and next
    // external nodes: only use key and value
    // 因此,當索引key時,要獲取value都只能在外部結點中獲得
    private static class Entry {
        private Comparable key;
        private final Object val;
        private Node next;     // helper field to iterate over array entries
        public Entry(Comparable key, Object val, Node next) {
            this.key  = key;
            this.val  = val;
            this.next = next;
        }
    }

    /**
     * Initializes an empty B-tree.
     */
    public PrincetonBTree() {
        root = new Node(0);
    }
 
    /**
     * Returns true if this symbol table is empty.
     * @return {@code true} if this symbol table is empty; {@code false} otherwise
     */
    public boolean isEmpty() {
        return size() == 0;
    }

    /**
     * Returns the number of key-value pairs in this symbol table.
     * @return the number of key-value pairs in this symbol table
     */
    public int size() {
        return n;
    }

    /**
     * Returns the height of this B-tree (for debugging).
     *
     * @return the height of this B-tree
     */
    public int height() {
        return height;
    }


    /**
     * Returns the value associated with the given key.
     *
     * @param  key the key
     * @return the value associated with the given key if the key is in the symbol table
     *         and {@code null} if the key is not in the symbol table
     * @throws IllegalArgumentException if {@code key} is {@code null}
     */
    public Value get(Key key) {
        if (key == null) throw new IllegalArgumentException("argument to get() is null");
        return search(root, key, height);
    }

    private Value search(Node x, Key key, int ht) {
        Entry[] children = x.children;

        // external node
        if (ht == 0) {
            for (int j = 0; j < x.m; j++) {
                if (eq(key, children[j].key)) return (Value) children[j].val;
            }
        }

        // internal node
        else {
            for (int j = 0; j < x.m; j++) {
                if (j+1 == x.m || less(key, children[j+1].key))
                    return search(children[j].next, key, ht-1);
            }
        }
        return null;
    }


    /**
     * Inserts the key-value pair into the symbol table, overwriting the old value
     * with the new value if the key is already in the symbol table.
     * If the value is {@code null}, this effectively deletes the key from the symbol table.
     *
     * @param  key the key
     * @param  val the value
     * @throws IllegalArgumentException if {@code key} is {@code null}
     */
    public void put(Key key, Value val) {
        if (key == null) throw new IllegalArgumentException("argument key to put() is null");
        Node node = insert(root, key, val, height); 
        n++;
        if (node == null) return;

        // need to split root
        Node newRoot = new Node(2);
        newRoot.children[0] = new Entry(root.children[0].key, null, root);
        newRoot.children[1] = new Entry(node.children[0].key, null, node);
        root = newRoot;
        height++;
    }

    private Node insert(Node node, Key key, Value val, int height) {
        int index;
        Entry entry = new Entry(key, val, null);

        // external node
        if (height == 0) {
            for (index = 0; index < node.m; index++) {
                if (less(key, node.children[index].key)) break;
            }
        }

        // internal node
        else {
            for (index = 0; index < node.m; index++) {
                if ((index+1 == node.m) || less(key, node.children[index+1].key)) {
                    Node splitNode = insert(node.children[index++].next, key, val, height-1);
                    if (splitNode == null) return null;
                    entry.key = splitNode.children[0].key;
                    entry.next = splitNode;
                    break;
                }
            }
        }

        for (int i = node.m; i > index; i--)
            node.children[i] = node.children[i-1];
        node.children[index] = entry;
        node.m++;
        if (node.m < M) return null;
        else         return split(node);
    }

    // split node in half
    private Node split(Node h) {
        Node t = new Node(M/2);
        h.m = M/2;
        for (int j = 0; j < M/2; j++)
            t.children[j] = h.children[M/2+j]; 
        return t;    
    }

    /**
     * Returns a string representation of this B-tree (for debugging).
     *
     * @return a string representation of this B-tree.
     */
    public String toString() {
        return toString(root, height, "") + "\n";
    }

    private String toString(Node h, int ht, String indent) {
        StringBuilder s = new StringBuilder();
        Entry[] children = h.children;

        if (ht == 0) {
            for (int j = 0; j < h.m; j++) {
                s.append(indent + children[j].key + " " + children[j].val + "\n");
            }
        }
        else {
            for (int j = 0; j < h.m; j++) {
                if (j > 0) s.append(indent + "(" + children[j].key + ")\n");
                s.append(toString(children[j].next, ht-1, indent + "     "));
            }
        }
        return s.toString();
    }


    // comparison functions - make Comparable instead of Key to avoid casts
    private boolean less(Comparable k1, Comparable k2) {
        return k1.compareTo(k2) < 0;
    }

    private boolean eq(Comparable k1, Comparable k2) {
        return k1.compareTo(k2) == 0;
    }


    /**
     * Unit tests the {@code BTree} data type.
     *
     * @param args the command-line arguments
     */
    public static void main(String[] args) {
        PrincetonBTree<Double, String> bTree = new PrincetonBTree<Double, String>();
        
        bTree.put(1D, "test1");
        bTree.put(4D, "test4");
		bTree.put(7D, "test7");
		bTree.put(0D, "test0");
		bTree.put(2D, "test2");
		bTree.put(5D, "test5");
		bTree.put(8D, "test8");
		bTree.put(6D, "test6");
		bTree.put(9D, "test9");
		bTree.put(3D, "test3");
		bTree.put(10D, "test10");

		System.out.println(bTree.get(-1D));
		System.out.println(bTree.get(0D));
		System.out.println(bTree.get(1D));
		System.out.println(bTree.get(2D));
		System.out.println(bTree.get(2.5D));
		System.out.println(bTree.get(3D));
		System.out.println(bTree.get(4D));
		System.out.println(bTree.get(5D));
		System.out.println(bTree.get(6D));
		System.out.println(bTree.get(7D));
		System.out.println(bTree.get(8D));
		System.out.println(bTree.get(9D));
		System.out.println(bTree.get(10D));
		System.out.println(bTree.get(11D));
    }

}

基於Princeton的修改版本

相同的哨兵,不同的結點

Princeton版本的BTree,顯然,要查找key對應的value值,都必要索引到葉子結點才能得到,其原因是因爲它區分了內部結點和外部結點。而在我修改的版本里,同樣也採用了哨兵,但結點不區分內部與外部結點,除了哨兵沒有value之外,所有非葉子結點既有value又有子結點。比如同樣是查找4的value值,Princeton的B樹要遞歸3次,而我修改的版本在根結點即可獲得。

下圖是對於同一個B樹例子,用我修改的版本所生成的B樹。對比三張圖,可以明顯地發現,Princeton版本的代碼生成的B樹跟我們手繪出來的是不一樣的;而我修改的版本所生成的B樹,和“B樹例子”裏是完成相同的,只是每個結點多了個哨兵。
基於Princeton的修改版本

源碼

public class BTreeWithSentinel<Key extends Comparable, Value> {
	private static final int M = 4;
	private static final int M_WITH_SENTINEL = M + 1;
	
	private Node root;
	private int height;
	
	BTreeWithSentinel() {
		this.root = new Node(1);
		this.root.keys[0] = new Entry(null, null, null);
		this.height = 0;
	}
	
	public static final class Node {
		Entry[] keys = new Entry[M_WITH_SENTINEL];
		int size = 0;
		
		Node(int size) {
			this.size = size;
		}
	}
	
	public static final class Entry {
		Comparable key;
		Object value;
		Node next;
		
		Entry(Comparable key, Object value, Node next) {
			this.key = key;
			this.value = value;
			this.next = next;
		}
	}
	
	 public void put(Key key, Value value) {
		 Node newNode = insert(this.root, key, value, this.height);
		 if (newNode == null) {
			 return;
		 }
		 
		 Node newRoot = new Node(2);
		 newRoot.keys[0] = new Entry(null, null, root);
		 newRoot.keys[1] = new Entry(newNode.keys[0].key, newNode.keys[0].value, newNode);
		 this.root = newRoot;
		 this.height++;
	 }
	 
	 public Value get(Key key) {
		 return search(this.root, key);
	 }
	 
	 public Value search(Node root, Key key) {
		 if (root == null) {
			 return null;
		 }
		 for (int i = 0; i < root.size; i++) {
			 if ((i+1) == root.size || less(key, (Key) root.keys[i+1].key)) {
				 return search(root.keys[i].next, key);
			 } else if (equal(key, (Key) root.keys[i+1].key)) {
				 return (Value) root.keys[i+1].value;
			 }
		 }
		 return null;
	 }
	 
	 public Node insert(Node root, Key key, Value value, int height) {
		 Entry entry = new Entry(key, value, null);
		 int index = 1;
		 
		 if (height == 0) {
			 for ( index = 1; index < root.size; index++) {
				 if (less(key, (Key) root.keys[index].key)) {
					 break;
				 }
			 }
		 } else {
			 for ( index = 0; index < root.size; index++) {
				 if ( (index + 1) == root.size || less(key, (Key) root.keys[index + 1].key)) {
					 Node newNode = insert(root.keys[index].next, key, value, height - 1);
					 if (newNode == null) {
						 return null;
					 }
					 entry = new Entry(newNode.keys[0].key, newNode.keys[0].value, newNode);
					 index++;
					 break;
				 }
			 }
		 }
		 
		 for (int i = root.size; i > index; i--) {
			 root.keys[i] = root.keys[i - 1];
		 }
		 root.keys[index] = entry;
		 root.size++;
		 if (root.size < M_WITH_SENTINEL) return null;
		 else return splitNode(root);
	 }
	 
	 public Node splitNode(Node node) {
		 Node newNode = new Node(ceilDiv(node.size, 2));
		 node.size = floorDiv(node.size, 2);
		 for (int i = 0; i < newNode.size; i++) {
			 newNode.keys[i] = node.keys[node.size + i];
		 }
		 return newNode;
	 }
	 
	 private int ceilDiv(int foo, int bar) {
		 return Double.valueOf(Math.ceil((double) foo / bar)).intValue();
	 }
	 
	 private int floorDiv(int foo, int bar) {
		 return Double.valueOf(Math.floor((double) foo / bar)).intValue();
	 }
	 
	 private Boolean more(Key foo, Key bar) {
		 return foo.compareTo(bar) > 0;
	 }
	 
	 private Boolean equal(Key foo, Key bar) {
		 return foo.compareTo(bar) == 0;
	 }
	 
	 private Boolean less(Key foo, Key bar) {
		 return foo.compareTo(bar) < 0;
	 }
	 
	 public static void main(String[] args) {
		BTreeWithSentinel<Double, String> bTree = new BTreeWithSentinel<Double, String>();
	        
		bTree.put(1D, "test1");
		bTree.put(4D, "test4");
		bTree.put(7D, "test7");
		bTree.put(0D, "test0");
		bTree.put(2D, "test2");
		bTree.put(5D, "test5");
		bTree.put(8D, "test8");
		bTree.put(6D, "test6");
		bTree.put(9D, "test9");
		bTree.put(3D, "test3");
		bTree.put(10D, "test10");
		
		System.out.println(bTree.get(-1D));
		System.out.println(bTree.get(0D));
		System.out.println(bTree.get(1D));
		System.out.println(bTree.get(2D));
		System.out.println(bTree.get(2.5D));
		System.out.println(bTree.get(3D));
		System.out.println(bTree.get(4D));
		System.out.println(bTree.get(5D));
		System.out.println(bTree.get(6D));
		System.out.println(bTree.get(7D));
		System.out.println(bTree.get(8D));
		System.out.println(bTree.get(9D));
		System.out.println(bTree.get(10D));
		System.out.println(bTree.get(11D));
	 }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章