facebook presto源碼解讀(part1)
——————原創文章,如有轉載請說明出處
邊學變賣,把代碼查看結果彙報給各位讀者,如有不對請指正。
part1主要是介紹整體的代碼結構和大致的運行流程。
代碼是在2016年8月從官網下載的。整體結構如下:
presto 客戶端代碼在presto-cli模塊
但presto服務端代碼不在presto-server,打開presto-server模塊可以發現他採用maven的插件進行編譯打包,改模塊只有該規則文件。
presto服務端代碼在presto-main模塊。PrestoServer類啓動服務器。
其他模塊簡述:
presto-base-jdbc 關係型數據庫連接器的公共模塊
presto-mysql mysql連接器用到了presto-base-jdbc
presto-jdbc jdbc客戶端/另一種是cli
presto-hive-* hive連接器相關代碼
presto-orc hive連接器讀取hdfs的orc文件,並做了一些優化
presto-ml machine learning,未實現,打算ing
presto-kafka/cassadra/jmx 各種連接器
下面介紹客戶端連接服務器進行sql語句的運行。主要運行ddl語句和Select語句,還有insert語句。
client入口代碼在Presto類
package com.facebook.presto.cli;
import static io.airlift.airline.SingleCommand.singleCommand;
public final class Presto
{
private Presto() {}
public static void main(String[] args)
throws Exception
{
Console console = singleCommand(Console.class).parse(args);
if (console.helpOption.showHelpIfRequested() ||
console.versionOption.showVersionIfRequested()) {
return;
}
console.run();
}
}
進入Console類的run方法,
public void run()
{
ClientSession session = clientOptions.toClientSession();
KerberosConfig kerberosConfig = clientOptions.toKerberosConfig();
boolean hasQuery = !Strings.isNullOrEmpty(clientOptions.execute);
boolean isFromFile = !Strings.isNullOrEmpty(clientOptions.file);
if (!hasQuery || !isFromFile) {
AnsiConsole.systemInstall();
}
initializeLogging(clientOptions.logLevelsFile);
String query = clientOptions.execute;
if (hasQuery) {
query += ";";
}
if (isFromFile) {
if (hasQuery) {
throw new RuntimeException("both --execute and --file specified");
}
try {
query = Files.toString(new File(clientOptions.file), UTF_8);
hasQuery = true;
}
catch (IOException e) {
throw new RuntimeException(format("Error reading from file %s: %s", clientOptions.file, e.getMessage()));
}
}
try (QueryRunner queryRunner = QueryRunner.create(
session,
Optional.ofNullable(clientOptions.socksProxy),
Optional.ofNullable(clientOptions.keystorePath),
Optional.ofNullable(clientOptions.keystorePassword),
Optional.ofNullable(clientOptions.krb5Principal),
Optional.ofNullable(clientOptions.krb5RemoteServiceName),
clientOptions.authenticationEnabled,
kerberosConfig)) {
if (hasQuery) {
executeCommand(queryRunner, query, clientOptions.outputFormat);
}
else {
runConsole(queryRunner, session);
}
}
}
進入 executeCommand(queryRunner, query, clientOptions.outputFormat);方法:
private static void executeCommand(QueryRunner queryRunner, String query, OutputFormat outputFormat)
{
StatementSplitter splitter = new StatementSplitter(query);
for (Statement split : splitter.getCompleteStatements()) {
if (!isEmptyStatement(split.statement())) {
process(queryRunner, split.statement(), outputFormat, false);
}
}
if (!isEmptyStatement(splitter.getPartialStatement())) {
System.err.println("Non-terminated statement: " + splitter.getPartialStatement());
}
}
進入process方法,
private static void process(QueryRunner queryRunner, String sql, OutputFormat outputFormat, boolean interactive)
{
try (Query query = queryRunner.startQuery(sql)) {
query.renderOutput(System.out, outputFormat, interactive);
ClientSession session = queryRunner.getSession();
// update session properties if present
if (!query.getSetSessionProperties().isEmpty() || !query.getResetSessionProperties().isEmpty()) {
Map<String, String> sessionProperties = new HashMap<>(session.getProperties());
sessionProperties.putAll(query.getSetSessionProperties());
sessionProperties.keySet().removeAll(query.getResetSessionProperties());
session = withProperties(session, sessionProperties);
}
// update transaction ID if necessary
if (query.isClearTransactionId()) {
session = stripTransactionId(session);
}
if (query.getStartedTransactionId() != null) {
session = withTransactionId(session, query.getStartedTransactionId());
}
queryRunner.setSession(session);
}
catch (RuntimeException e) {
System.err.println("Error running command: " + e.getMessage());
if (queryRunner.getSession().isDebug()) {
e.printStackTrace();
}
}
}
進入Query query = queryRunner.startQuery(sql)方法發送rest請求到server端進行運行:
public Query startQuery(String query)
{
return new Query(startInternalQuery(query));
}
public StatementClient startInternalQuery(String query)
{
return new StatementClient(httpClient, queryResultsCodec, session.get(), query);
}
public StatementClient(HttpClient httpClient, JsonCodec<QueryResults> queryResultsCodec, ClientSession session, String query)
{
requireNonNull(httpClient, "httpClient is null");
requireNonNull(queryResultsCodec, "queryResultsCodec is null");
requireNonNull(session, "session is null");
requireNonNull(query, "query is null");
this.httpClient = httpClient;
this.responseHandler = createFullJsonResponseHandler(queryResultsCodec);
this.debug = session.isDebug();
this.timeZoneId = session.getTimeZoneId();
this.query = query;
this.requestTimeoutNanos = session.getClientRequestTimeout().roundTo(NANOSECONDS);
Request request = buildQueryRequest(session, query);
JsonResponse<QueryResults> response = httpClient.execute(request, responseHandler);
if (response.getStatusCode() != HttpStatus.OK.code() || !response.hasValue()) {
throw requestFailedException("starting query", request, response);
}
processResponse(response);
}
進入buildQueryRequest方法可以發現目標rest地址:
private static Request buildQueryRequest(ClientSession session, String query)
{
Request.Builder builder = preparePost()
.setUri(uriBuilderFrom(session.getServer()).replacePath("/v1/statement").build())
.setBodyGenerator(createStaticBodyGenerator(query, UTF_8));
if (session.getUser() != null) {
builder.setHeader(PrestoHeaders.PRESTO_USER, session.getUser());
}
if (session.getSource() != null) {
builder.setHeader(PrestoHeaders.PRESTO_SOURCE, session.getSource());
}
if (session.getCatalog() != null) {
builder.setHeader(PrestoHeaders.PRESTO_CATALOG, session.getCatalog());
}
if (session.getSchema() != null) {
builder.setHeader(PrestoHeaders.PRESTO_SCHEMA, session.getSchema());
}
builder.setHeader(PrestoHeaders.PRESTO_TIME_ZONE, session.getTimeZoneId());
builder.setHeader(PrestoHeaders.PRESTO_LANGUAGE, session.getLocale().toLanguageTag());
builder.setHeader(USER_AGENT, USER_AGENT_VALUE);
Map<String, String> property = session.getProperties();
for (Entry<String, String> entry : property.entrySet()) {
builder.addHeader(PrestoHeaders.PRESTO_SESSION, entry.getKey() + "=" + entry.getValue());
}
builder.setHeader(PrestoHeaders.PRESTO_TRANSACTION_ID, session.getTransactionId() == null ? "NONE" : session.getTransactionId());
return builder.build();
}
v1/statement
發現StatementResource類爲目標服務類:
@Path("/v1/statement")
public class StatementResource
{
private static final Logger log = Logger.get(StatementResource.class);
private static final Duration MAX_WAIT_TIME = new Duration(1, SECONDS);
private static final Ordering<Comparable<Duration>> WAIT_ORDERING = Ordering.natural().nullsLast();
private static final long DESIRED_RESULT_BYTES = new DataSize(1, MEGABYTE).toBytes();
private final QueryManager queryManager;
private final AccessControl accessControl;
private final SessionPropertyManager sessionPropertyManager;
private final ExchangeClientSupplier exchangeClientSupplier;
private final QueryIdGenerator queryIdGenerator;
private final ConcurrentMap<QueryId, Query> queries = new ConcurrentHashMap<>();
private final ScheduledExecutorService queryPurger = newSingleThreadScheduledExecutor(threadsNamed("query-purger"));
....
@POST
@Produces(MediaType.APPLICATION_JSON)
public Response createQuery(
String statement,
@Context HttpServletRequest servletRequest,
@Context UriInfo uriInfo)
throws InterruptedException
{
assertRequest(!isNullOrEmpty(statement), "SQL statement is empty");
Session session = createSessionForRequest(servletRequest, accessControl, sessionPropertyManager, queryIdGenerator.createNextQueryId());
ExchangeClient exchangeClient = exchangeClientSupplier.get(deltaMemoryInBytes -> { });
Query query = new Query(session, statement, queryManager, exchangeClient);
queries.put(query.getQueryId(), query);
return getQueryResults(query, Optional.empty(), uriInfo, new Duration(1, MILLISECONDS));
}
在createQuery方法中將query存入一個對象。
在StatementResource構造函數運行時會進行調度:
@Inject
public StatementResource(
QueryManager queryManager,
AccessControl accessControl,
SessionPropertyManager sessionPropertyManager,
ExchangeClientSupplier exchangeClientSupplier,
QueryIdGenerator queryIdGenerator)
{
this.queryManager = requireNonNull(queryManager, "queryManager is null");
this.accessControl = requireNonNull(accessControl, "accessControl is null");
this.sessionPropertyManager = requireNonNull(sessionPropertyManager, "sessionPropertyManager is null");
this.exchangeClientSupplier = requireNonNull(exchangeClientSupplier, "exchangeClientSupplier is null");
this.queryIdGenerator = requireNonNull(queryIdGenerator, "queryIdGenerator is null");
queryPurger.scheduleWithFixedDelay(new PurgeQueriesRunnable(queries, queryManager), 200, 200, MILLISECONDS);
}
PurgeQueriesRunnable線程的run方法中獲取query進行運行, PurgeQueriesRunnable是私有內嵌類:
private static class PurgeQueriesRunnable
implements Runnable
{
private final ConcurrentMap<QueryId, Query> queries;
private final QueryManager queryManager;
public PurgeQueriesRunnable(ConcurrentMap<QueryId, Query> queries, QueryManager queryManager)
{
this.queries = queries;
this.queryManager = queryManager;
}
@Override
public void run()
{
try {
// Queries are added to the query manager before being recorded in queryIds set.
// Therefore, we take a snapshot if queryIds before getting the live queries
// from the query manager. Then we remove only the queries in the snapshot and
// not live queries set. If we did this in the other order, a query could be
// registered between fetching the live queries and inspecting the queryIds set.
for (QueryId queryId : ImmutableSet.copyOf(queries.keySet())) {
Query query = queries.get(queryId);
Optional<QueryState> state = queryManager.getQueryState(queryId);
// free up resources if the query completed
if (!state.isPresent() || state.get() == QueryState.FAILED) {
query.dispose();
}
// forget about this query if the query manager is no longer tracking it
if (!state.isPresent()) {
queries.remove(queryId);
}
}
}
catch (Throwable e) {
log.warn(e, "Error removing old queries");
}
}
}
進入QueryResource類的createQuery方法:
@POST
@Produces(MediaType.APPLICATION_JSON)
public Response createQuery(
String statement,
@Context HttpServletRequest servletRequest,
@Context UriInfo uriInfo)
{
assertRequest(!isNullOrEmpty(statement), "SQL statement is empty");
Session session = createSessionForRequest(servletRequest, accessControl, sessionPropertyManager, queryIdGenerator.createNextQueryId());
QueryInfo queryInfo = queryManager.createQuery(session, statement);
URI pagesUri = uriBuilderFrom(uriInfo.getRequestUri()).appendPath(queryInfo.getQueryId().toString()).build();
return Response.created(pagesUri).entity(queryInfo).build();
}
進入SqlQueryManager的createQuery方法:
@Override
public QueryInfo createQuery(Session session, String query)
{
requireNonNull(query, "query is null");
checkArgument(!query.isEmpty(), "query must not be empty string");
QueryId queryId = session.getQueryId();
QueryExecution queryExecution;
Statement statement;
try {
statement = sqlParser.createStatement(query);
QueryExecutionFactory<?> queryExecutionFactory = executionFactories.get(statement.getClass());
if (queryExecutionFactory == null) {
throw new PrestoException(NOT_SUPPORTED, "Unsupported statement type: " + statement.getClass().getSimpleName());
}
if (statement instanceof Explain && ((Explain) statement).isAnalyze()) {
Statement innerStatement = ((Explain) statement).getStatement();
if (!(executionFactories.get(innerStatement.getClass()) instanceof SqlQueryExecutionFactory)) {
throw new PrestoException(NOT_SUPPORTED, "EXPLAIN ANALYZE only supported for statements that are queries");
}
}
queryExecution = queryExecutionFactory.createQueryExecution(queryId, query, session, statement);
}
catch (ParsingException | PrestoException e) {
// This is intentionally not a method, since after the state change listener is registered
// it's not safe to do any of this, and we had bugs before where people reused this code in a method
URI self = locationFactory.createQueryLocation(queryId);
QueryExecution execution = new FailedQueryExecution(queryId, query, session, self, transactionManager, queryExecutor, e);
queries.put(queryId, execution);
QueryInfo queryInfo = execution.getQueryInfo();
queryMonitor.createdEvent(queryInfo);
queryMonitor.completionEvent(queryInfo);
stats.queryFinished(queryInfo);
expirationQueue.add(execution);
return queryInfo;
}
QueryInfo queryInfo = queryExecution.getQueryInfo();
queryMonitor.createdEvent(queryInfo);
queryExecution.addStateChangeListener(newValue -> {
if (newValue.isDone()) {
QueryInfo info = queryExecution.getQueryInfo();
stats.queryFinished(info);
queryMonitor.completionEvent(info);
expirationQueue.add(queryExecution);
}
});
queries.put(queryId, queryExecution);
// start the query in the background
if (!queueManager.submit(statement, queryExecution, queryExecutor, stats)) {
queryExecution.fail(new PrestoException(QUERY_QUEUE_FULL, "Too many queued queries!"));
}
return queryInfo;
}
創建QueryExecution並
調用QueryQueueManager的submit方法。在QueuedExecution中調用start方法,通過QueryExecution自身的start方法運行:
@Override
public void start()
{
try (SetThreadName ignored = new SetThreadName("Query-%s", stateMachine.getQueryId())) {
try {
// transition to planning
if (!stateMachine.transitionToPlanning()) {
// query already started or finished
return;
}
// analyze query
PlanRoot plan = analyzeQuery();
// plan distribution of query
planDistribution(plan);
// transition to starting
if (!stateMachine.transitionToStarting()) {
// query already started or finished
return;
}
// if query is not finished, start the scheduler, otherwise cancel it
SqlQueryScheduler scheduler = queryScheduler.get();
if (!stateMachine.isDone()) {
scheduler.start();
}
}
catch (Throwable e) {
fail(e);
Throwables.propagateIfInstanceOf(e, Error.class);
}
}
}
PlanRoot plan = analyzeQuery(); 分析查詢得到plan
planDistribution(plan);創建調度器和步驟,準備分發計劃
在他裏面創建調度器
// build the stage execution objects (this doesn't schedule execution)
SqlQueryScheduler scheduler = new SqlQueryScheduler(
stateMachine,
locationFactory,
outputStageExecutionPlan,
nodePartitioningManager,
nodeScheduler,
remoteTaskFactory,
stateMachine.getSession(),
plan.isSummarizeTaskInfos(),
scheduleSplitBatchSize,
queryExecutor,
ROOT_OUTPUT_BUFFERS,
nodeTaskMap,
executionPolicy);
調度器構造函數裏創建stages:
List<SqlStageExecution> stages = createStages(
Optional.empty(),
new AtomicInteger(),
locationFactory,
plan.withBucketToPartition(Optional.of(new int[1])),
nodeScheduler,
remoteTaskFactory,
session,
splitBatchSize,
partitioningHandle -> partitioningCache.computeIfAbsent(partitioningHandle, handle -> nodePartitioningManager.getNodePartitioningMap(session, handle)),
executor,
nodeTaskMap,
stageSchedulers,
stageLinkages);
scheduler.start(); 分發plan成task到worker
worker上的task服務器接受到rest請求如下:
TaskResource類:
@POST
@Path("{taskId}")
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
public Response createOrUpdateTask(@PathParam("taskId") TaskId taskId, TaskUpdateRequest taskUpdateRequest, @Context UriInfo uriInfo)
{
requireNonNull(taskUpdateRequest, "taskUpdateRequest is null");
Session session = taskUpdateRequest.getSession().toSession(sessionPropertyManager);
TaskInfo taskInfo = taskManager.updateTask(session,
taskId,
taskUpdateRequest.getFragment(),
taskUpdateRequest.getSources(),
taskUpdateRequest.getOutputIds());
if (shouldSummarize(uriInfo)) {
taskInfo = taskInfo.summarize();
}
return Response.ok().entity(taskInfo).build();
}
@Override
public TaskInfo updateTask(Session session, TaskId taskId, Optional<PlanFragment> fragment, List<TaskSource> sources, OutputBuffers outputBuffers)
{
requireNonNull(session, "session is null");
requireNonNull(taskId, "taskId is null");
requireNonNull(fragment, "fragment is null");
requireNonNull(sources, "sources is null");
requireNonNull(outputBuffers, "outputBuffers is null");
if (resourceOvercommit(session)) {
// TODO: This should have been done when the QueryContext was created. However, the session isn't available at that point.
queryContexts.getUnchecked(taskId.getQueryId()).setResourceOvercommit();
}
SqlTask sqlTask = tasks.getUnchecked(taskId);
sqlTask.recordHeartbeat();
return sqlTask.updateTask(session, fragment, sources, outputBuffers);
}
public TaskInfo updateTask(Session session, Optional<PlanFragment> fragment, List<TaskSource> sources, OutputBuffers outputBuffers)
{
try {
// assure the task execution is only created once
SqlTaskExecution taskExecution;
synchronized (this) {
// is task already complete?
TaskHolder taskHolder = taskHolderReference.get();
if (taskHolder.isFinished()) {
return taskHolder.getFinalTaskInfo();
}
taskExecution = taskHolder.getTaskExecution();
if (taskExecution == null) {
checkState(fragment.isPresent(), "fragment must be present");
taskExecution = sqlTaskExecutionFactory.create(session, queryContext, taskStateMachine, sharedBuffer, fragment.get(), sources);
taskHolderReference.compareAndSet(taskHolder, new TaskHolder(taskExecution));
needsPlan.set(false);
}
}
if (taskExecution != null) {
// addSources checks for task completion, so update the buffers first and the task might complete earlier
sharedBuffer.setOutputBuffers(outputBuffers);
taskExecution.addSources(sources);
}
}
catch (Error e) {
failed(e);
throw e;
}
catch (RuntimeException e) {
failed(e);
}
return getTaskInfo();
}
構造SqlTaskExecution:
public TaskInfo updateTask(Session session, Optional<PlanFragment> fragment, List<TaskSource> sources, OutputBuffers outputBuffers)
{
try {
// assure the task execution is only created once
SqlTaskExecution taskExecution;
synchronized (this) {
// is task already complete?
TaskHolder taskHolder = taskHolderReference.get();
if (taskHolder.isFinished()) {
return taskHolder.getFinalTaskInfo();
}
taskExecution = taskHolder.getTaskExecution();
if (taskExecution == null) {
checkState(fragment.isPresent(), "fragment must be present");
taskExecution = sqlTaskExecutionFactory.create(session, queryContext, taskStateMachine, sharedBuffer, fragment.get(), sources);
taskHolderReference.compareAndSet(taskHolder, new TaskHolder(taskExecution));
needsPlan.set(false);
}
}
if (taskExecution != null) {
// addSources checks for task completion, so update the buffers first and the task might complete earlier
sharedBuffer.setOutputBuffers(outputBuffers);
taskExecution.addSources(sources);
}
}
catch (Error e) {
failed(e);
throw e;
}
catch (RuntimeException e) {
failed(e);
}
return getTaskInfo();
}
private SqlTaskExecution(
TaskStateMachine taskStateMachine,
TaskContext taskContext,
SharedBuffer sharedBuffer,
PlanFragment fragment,
LocalExecutionPlanner planner,
TaskExecutor taskExecutor,
QueryMonitor queryMonitor,
Executor notificationExecutor)
{
this.taskStateMachine = requireNonNull(taskStateMachine, "taskStateMachine is null");
this.taskId = taskStateMachine.getTaskId();
this.taskContext = requireNonNull(taskContext, "taskContext is null");
this.sharedBuffer = requireNonNull(sharedBuffer, "sharedBuffer is null");
this.taskExecutor = requireNonNull(taskExecutor, "driverExecutor is null");
this.notificationExecutor = requireNonNull(notificationExecutor, "notificationExecutor is null");
this.queryMonitor = requireNonNull(queryMonitor, "queryMonitor is null");
try (SetThreadName ignored = new SetThreadName("Task-%s", taskId)) {
List<DriverFactory> driverFactories;
try {
LocalExecutionPlan localExecutionPlan = planner.plan(
taskContext.getSession(),
fragment.getRoot(),
fragment.getSymbols(),
fragment.getPartitionFunction(),
sharedBuffer,
fragment.getPartitioning().isSingleNode(),
fragment.getPartitionedSource() == null);
driverFactories = localExecutionPlan.getDriverFactories();
}
catch (Throwable e) {
// planning failed
taskStateMachine.failed(e);
throw Throwables.propagate(e);
}
// index driver factories
DriverSplitRunnerFactory partitionedDriverFactory = null;
ImmutableList.Builder<DriverSplitRunnerFactory> unpartitionedDriverFactories = ImmutableList.builder();
for (DriverFactory driverFactory : driverFactories) {
if (driverFactory.getSourceIds().contains(fragment.getPartitionedSource())) {
checkState(partitionedDriverFactory == null, "multiple partitioned sources are not supported");
partitionedDriverFactory = new DriverSplitRunnerFactory(driverFactory);
}
else {
unpartitionedDriverFactories.add(new DriverSplitRunnerFactory(driverFactory));
}
}
this.unpartitionedDriverFactories = unpartitionedDriverFactories.build();
if (fragment.getPartitionedSource() != null) {
checkArgument(partitionedDriverFactory != null, "Fragment is partitioned, but no partitioned driver found");
}
this.partitionedSourceId = fragment.getPartitionedSource();
this.partitionedDriverFactory = partitionedDriverFactory;
// don't register the task if it is already completed (most likely failed during planning above)
if (!taskStateMachine.getState().isDone()) {
taskHandle = taskExecutor.addTask(taskId, sharedBuffer::getUtilization, getInitialSplitsPerNode(taskContext.getSession()), getSplitConcurrencyAdjustmentInterval(taskContext.getSession()));
taskStateMachine.addStateChangeListener(new RemoveTaskHandleWhenDone(taskExecutor, taskHandle));
taskStateMachine.addStateChangeListener(state -> {
if (state.isDone()) {
for (DriverFactory factory : driverFactories) {
factory.close();
}
}
});
}
else {
taskHandle = null;
}
sharedBuffer.addStateChangeListener(new CheckTaskCompletionOnBufferFinish(SqlTaskExecution.this));
}
}
在該構造函數中構建本地plan,構建多個driver執行各自的split
綜上,整體的流程如下:
獲取結果過程: