假設我們只使用calcite做查詢,因爲以上的數據基本上都是通過其他方式寫入的數據,而我們需要的是通SQL查詢,calcite實現了SQL語句的解析,生成物理執行計劃以及查詢計劃的優化,用戶需要向Calcite提供數據庫的元數據(有哪些database(schema),每一個數據庫下有哪些table,每一個表有哪些字段,每一個字段的類型是什麼)和數據(每一個表中的每一行數據是什麼)。除此之外,用戶也可以重載它提供的執行計劃,這裏只是提及到了Calcite的一些基本功能,高階功能諸如Streaming(流式查詢)、Lattices(物化視圖)等,目前使用Calcite的方式是作爲一個本地的框架工具而非作爲一個服務存在。
Apache Calcite具有以下幾個技術特性:
支持標準SQL語言;獨立於編程語言和數據源,可以支持不同的前端和後端;
支持關係代數、可定製的邏輯規劃規則和基於成本模型優化的查詢引擎;
支持物化視圖(materialized view)的管理(創建、丟棄、持久化和自動識別);
基於物化視圖的Lattice和Tile機制,以應用於OLAP分析;
支持對流數據的查詢。
這裏有一篇介紹Calcite的文章可以參考:http://www.infoq.com/cn/articles/new-big-data-hadoop-query-engine-apache-calcite
public static final Map<String, Database> MAP = new HashMap<String, Database>();
public static class Database {
public List<Table> tables = new LinkedList<Table>();
}
public static class Table{
public String tableName;
public List<Column> columns = new LinkedList<Column>();
public List<List<String>> data = new LinkedList<List<String>>();
}
public static class Column{
public String name;
public String type;
}
cl. tableName = "Class";
Column name = new Column();
name.name = "name";
name.type = "varchar";
cl.columns.add(name);
Column id = new Column();
id.name = "id";
id.type = "integer";
cl.columns.add(id);
Column teacher = new Column();
teacher.name = "teacher";
teacher.type = "varchar";
cl.columns.add(teacher);
student. tableName = "Student";
Column name = new Column();
name.name = "name";
name.type = "varchar";
student.columns.add(name);
Column id = new Column();
id.name = "id";
id.type = "varchar";
student.columns.add(id);
Column classId = new Column();
classId.name = "classId";
classId.type = "integer";
student.columns.add(classId);
Column birth = new Column();
birth.name = "birthday";
birth.type = "date";
student.columns.add(birth);
Column home = new Column();
home.name = "home";
home.type = "varchar";
student.columns.add(home);
{
version: '1.0',
defaultSchema: 'school',
schemas: [
{
name: 'school',
type: 'custom',
factory: 'org.apache.kylin.calcite.test.MemorySchemaFactory',
operand: {
param1: 'hello',
param2: 'world';
}
}
]
}
public class MemorySchemaFactory implements SchemaFactory{
@Override
public Schema create(SchemaPlus parentSchema, String name, Map<String, Object> operand) {
System. out.println( "param1 : " + operand.get( "param1"));
System. out.println( "param2 : " + operand.get( "param2"));
System. out.println( "Get database " + name);
return new MemorySchema( name);
}
}
@Override
public Map<String, Table> getTableMap() {
Map<String, Table> tables = new HashMap<String, Table>();
Database database = MemoryData. MAP.get( this. dbName);
if(database == null)
return tables;
for(MemoryData.Table table : database. tables) {
tables.put( table. tableName, new MemoryTable( table));
}
return tables;
}
@Override
public RelDataType getRowType(RelDataTypeFactory typeFactory) {
if(dataType == null) {
RelDataTypeFactory.FieldInfoBuilder fieldInfo = typeFactory.builder();
for (MemoryData.Column column : this. sourceTable. columns) {
RelDataType sqlType = typeFactory.createSqlType(
MemoryData.SQLTYPE_MAPPING.get(column .type ));
sqlType = SqlTypeUtil.addCharsetAndCollation(sqlType, typeFactory);
fieldInfo.add( column. name, sqlType);
}
this. dataType = typeFactory.createStructType( fieldInfo);
}
return this.dataType;
}
@Override
public Enumerable<Object[]> scan(DataContext root) {
final int[] fields = identityList(this.dataType.getFieldCount());
return new AbstractEnumerable<Object[]>() {
public Enumerator<Object[]> enumerator() {
return new MemoryEnumerator<Object[]>( fields, sourceTable. data);
}
};
}
public static void main(String[] args) {
try {
Class. forName("org.apache.calcite.jdbc.Driver");
} catch (ClassNotFoundException e1) {
e1.printStackTrace();
}
Properties info = new Properties();
try {
Connection connection =
DriverManager.getConnection("jdbc:calcite:model=E:\\file\\to\\model\\file\\School.json", info );
ResultSet result = connection.getMetaData().getTables( null, null, null, null);
while( result.next()) {
System. out.println( "Catalog : " + result.getString(1) + ",Database : " + result.getString(2) + ",Table : " + result .getString(3));
}
result.close();
Statement st = connection.createStatement();
result = st.executeQuery( "select \"home\", 1 , count(1) from \"Student\" as S INNER JOIN \"Class\" as C on S.\"classId\" = C.\"id\" group by \"home\"");
while( result.next()) {
System. out.println( result.getString(1) + "\t" + result.getString(2) + "\t" + result.getString(3));
}
result.close();
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
param1 : hello
param2 : world
Get database school
Catalog : null,Database : metadata,Table : COLUMNS
Catalog : null,Database : metadata,Table : TABLES
Catalog : null,Database : school,Table : Class
Catalog : null,Database : school,Table : Student
sichuan 1 1
zhejiang 1 1
henan 1 1
jiangsu 1 1
hebei 1 1
beijing 1 1
anhui 1 2