09_Flink Streaming execute

通過env.execute();啓動任務。env有四個實現類。LocalStreamEnvironment,是在本地jvm使用多線程模擬一個flink集羣。四個實現類都是通過getStreamGraph來獲取DAG圖對象StreamGraph。

StreamGraph,保存着點邊關係,和需要的上下文對象。可以重新api編程的語義。持有一個JobGraph對象,用來提交給集羣。由Client,JobClient,JobClientMessages來執行提交操作,通過akka的Patterns來通信啓動任務 。

datastream最後都是通過transform接口進行轉換,形成transformation和operator。transform會通過getExecutionEnvironment().addOperator(resultTransform);將結果慢慢的反饋給env的List<StreamTransformation<?>> transformations。

StreamGraphGenerator.generate(this, transformations);最後由StreamGraphGenerator,根據env和transformations來生成StreamGraph這個DAG對象。通過transform

將env中的transformation慢慢的繪製StreamGraph的DAG圖。

StreamGraph是代碼的執行邏輯,持有的jobgraph纔是task運行依據的DAG。通過createJobGraph將邏輯計劃轉化成任務計劃。

結論:execute,就是將編程的邏輯,轉換成一個streamGraph對象。然後將轉換成jobgraph提交給集羣。



/**
	 * Creates an execution environment that represents the context in which the
	 * program is currently executed. If the program is invoked standalone, this
	 * method returns a local execution environment, as returned by
	 * {@link #createLocalEnvironment()}.
	 *
	 * @return The execution environment of the context in which the program is
	 * executed.
	 */
	public static StreamExecutionEnvironment getExecutionEnvironment() {
		if (contextEnvironmentFactory != null) {
			return contextEnvironmentFactory.createExecutionEnvironment();
		}

		// because the streaming project depends on "flink-clients" (and not the other way around)
		// we currently need to intercept the data set environment and create a dependent stream env.
		// this should be fixed once we rework the project dependencies
		
		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
		if (env instanceof ContextEnvironment) {
			return new StreamContextEnvironment((ContextEnvironment) env);
		} else if (env instanceof OptimizerPlanEnvironment | env instanceof PreviewPlanEnvironment) {
			return new StreamPlanEnvironment(env);
		} else {
			return createLocalEnvironment();
		}
	}

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java;

import com.esotericsoftware.kryo.Serializer;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.Plan;
import org.apache.flink.api.common.cache.DistributedCache.DistributedCacheEntry;
import org.apache.flink.api.common.io.FileInputFormat;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.operators.OperatorInformation;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.hadoop.mapred.HadoopInputFormat;
import org.apache.flink.api.java.io.CollectionInputFormat;
import org.apache.flink.api.java.io.CsvReader;
import org.apache.flink.api.java.io.IteratorInputFormat;
import org.apache.flink.api.java.io.ParallelIteratorInputFormat;
import org.apache.flink.api.java.io.PrimitiveInputFormat;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.io.TextValueInputFormat;
import org.apache.flink.api.java.operators.DataSink;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.Operator;
import org.apache.flink.api.java.operators.OperatorTranslation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.PojoTypeInfo;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.api.java.typeutils.ValueTypeInfo;
import org.apache.flink.api.java.typeutils.runtime.kryo.Serializers;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.types.StringValue;
import org.apache.flink.util.NumberSequenceIterator;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.SplittableIterator;
import org.apache.flink.util.Visitor;

import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

/**
 * The ExecutionEnvironment is the context in which a program is executed. A
 * {@link LocalEnvironment} will cause execution in the current JVM, a
 * {@link RemoteEnvironment} will cause execution on a remote setup.
 * <p>
 * The environment provides methods to control the job execution (such as setting the parallelism)
 * and to interact with the outside world (data access).
 * <p>
 * Please note that the execution environment needs strong type information for the input and return types
 * of all operations that are executed. This means that the environments needs to know that the return
 * value of an operation is for example a Tuple of String and Integer.
 * Because the Java compiler throws much of the generic type information away, most methods attempt to re-
 * obtain that information using reflection. In certain cases, it may be necessary to manually supply that
 * information to some of the methods.
 *
 * @see LocalEnvironment
 * @see RemoteEnvironment
 */
@Public
public abstract class ExecutionEnvironment {

	/** The logger used by the environment and its subclasses */
	protected static final Logger LOG = LoggerFactory.getLogger(ExecutionEnvironment.class);

	/** The environment of the context (local by default, cluster if invoked through command line) */
	private static ExecutionEnvironmentFactory contextEnvironmentFactory;

	/** The default parallelism used by local environments */
	private static int defaultLocalDop = Runtime.getRuntime().availableProcessors();

	// --------------------------------------------------------------------------------------------

	private final List<DataSink<?>> sinks = new ArrayList<>();

	private final List<Tuple2<String, DistributedCacheEntry>> cacheFile = new ArrayList<>();

	private final ExecutionConfig config = new ExecutionConfig();

	/** Result from the latest execution, to make it retrievable when using eager execution methods */
	protected JobExecutionResult lastJobExecutionResult;

	/** The ID of the session, defined by this execution environment. Sessions and Jobs are same in
	 *  Flink, as Jobs can consist of multiple parts that are attached to the growing dataflow graph */
	protected JobID jobID;

	/** The session timeout in seconds */
	protected long sessionTimeout;

	/** Flag to indicate whether sinks have been cleared in previous executions */
	private boolean wasExecuted = false;


	/**
	 * Creates a new Execution Environment.
	 */
	protected ExecutionEnvironment() {
		jobID = JobID.generate();
	}

	// --------------------------------------------------------------------------------------------
	//  Properties
	// --------------------------------------------------------------------------------------------

	/**
	 * Gets the config object that defines execution parameters.
	 *
	 * @return The environment's execution configuration.
	 */
	public ExecutionConfig getConfig() {
		return config;
	}

	/**
	 * Gets the parallelism with which operation are executed by default. Operations can
	 * individually override this value to use a specific parallelism via
	 * {@link Operator#setParallelism(int)}. Other operations may need to run with a different
	 * parallelism - for example calling
	 * {@link DataSet#reduce(org.apache.flink.api.common.functions.ReduceFunction)} over the entire
	 * set will insert eventually an operation that runs non-parallel (parallelism of one).
	 *
	 * @return The parallelism used by operations, unless they override that value. This method
	 *         returns {@link ExecutionConfig#PARALLELISM_DEFAULT}, if the environment's default parallelism should be used.
	 */
	public int getParallelism() {
		return config.getParallelism();
	}

	/**
	 * Sets the parallelism for operations executed through this environment.
	 * Setting a parallelism of x here will cause all operators (such as join, map, reduce) to run with
	 * x parallel instances.
	 * <p>
	 * This method overrides the default parallelism for this environment.
	 * The {@link LocalEnvironment} uses by default a value equal to the number of hardware
	 * contexts (CPU cores / threads). When executing the program via the command line client
	 * from a JAR file, the default parallelism is the one configured for that setup.
	 *
	 * @param parallelism The parallelism
	 */
	public void setParallelism(int parallelism) {
		config.setParallelism(parallelism);
	}

<<<<<<< HEAD
	/**
	 * Sets the restart strategy configuration. The configuration specifies which restart strategy
	 * will be used for the execution graph in case of a restart.
	 *
	 * @param restartStrategyConfiguration Restart strategy configuration to be set
	 */
	@PublicEvolving
	public void setRestartStrategy(RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration) {
		config.setRestartStrategy(restartStrategyConfiguration);
	}

	/**
	 * Returns the specified restart strategy configuration.
	 *
	 * @return The restart strategy configuration to be used
	 */
	@PublicEvolving
	public RestartStrategies.RestartStrategyConfiguration getRestartStrategy() {
		return config.getRestartStrategy();
	}

=======
>>>>>>> 644c27504ad6fb89372e3b39123a4f896013e1ad
	/**
	 * Sets the number of times that failed tasks are re-executed. A value of zero
	 * effectively disables fault tolerance. A value of {@code -1} indicates that the system
	 * default value (as defined in the configuration) should be used.
	 *
	 * @param numberOfExecutionRetries The number of times the system will try to re-execute failed tasks.
	 *
	 * @deprecated This method will be replaced by {@link #setRestartStrategy}. The
	 * {@link RestartStrategies.FixedDelayRestartStrategyConfiguration} contains the number of
	 * execution retries.
	 */
	@Deprecated
	@PublicEvolving
	public void setNumberOfExecutionRetries(int numberOfExecutionRetries) {
		config.setNumberOfExecutionRetries(numberOfExecutionRetries);
	}

	/**
	 * Gets the number of times the system will try to re-execute failed tasks. A value
	 * of {@code -1} indicates that the system default value (as defined in the configuration)
	 * should be used.
	 *
	 * @return The number of times the system will try to re-execute failed tasks.
	 *
	 * @deprecated This method will be replaced by {@link #getRestartStrategy}. The
	 * {@link RestartStrategies.FixedDelayRestartStrategyConfiguration} contains the number of
	 * execution retries.
	 */
	@Deprecated
	@PublicEvolving
	public int getNumberOfExecutionRetries() {
		return config.getNumberOfExecutionRetries();
	}

	/**
	 * Returns the {@link org.apache.flink.api.common.JobExecutionResult} of the last executed job.
	 *
	 * @return The execution result from the latest job execution.
	 */
	public JobExecutionResult getLastJobExecutionResult(){
		return this.lastJobExecutionResult;
	}

	// --------------------------------------------------------------------------------------------
	//  Session Management
	// --------------------------------------------------------------------------------------------

	/**
	 * Gets the JobID by which this environment is identified. The JobID sets the execution context
	 * in the cluster or local environment.
	 *
	 * @return The JobID of this environment.
	 * @see #getIdString()
	 */
	@PublicEvolving
	public JobID getId() {
		return this.jobID;
	}

	/**
	 * Gets the JobID by which this environment is identified, as a string.
	 *
	 * @return The JobID as a string.
	 * @see #getId()
	 */
	@PublicEvolving
	public String getIdString() {
		return this.jobID.toString();
	}

	/**
	 * Sets the session timeout to hold the intermediate results of a job. This only
	 * applies the updated timeout in future executions.
	 *
	 * @param timeout The timeout, in seconds.
	 */
	@PublicEvolving
	public void setSessionTimeout(long timeout) {
		throw new IllegalStateException("Support for sessions is currently disabled. " +
				"It will be enabled in future Flink versions.");
		// Session management is disabled, revert this commit to enable
		//if (timeout < 0) {
		//	throw new IllegalArgumentException("The session timeout must not be less than zero.");
		//}
		//this.sessionTimeout = timeout;
	}

	/**
	 * Gets the session timeout for this environment. The session timeout defines for how long
	 * after an execution, the job and its intermediate results will be kept for future
	 * interactions.
	 *
	 * @return The session timeout, in seconds.
	 */
	@PublicEvolving
	public long getSessionTimeout() {
		return sessionTimeout;
	}

	/**
	 * Starts a new session, discarding the previous data flow and all of its intermediate results.
	 */
	@PublicEvolving
	public abstract void startNewSession() throws Exception;

	// --------------------------------------------------------------------------------------------
	//  Registry for types and serializers
	// --------------------------------------------------------------------------------------------

	/**
	 * Adds a new Kryo default serializer to the Runtime.
	 *
	 * Note that the serializer instance must be serializable (as defined by java.io.Serializable),
	 * because it may be distributed to the worker nodes by java serialization.
	 *
	 * @param type The class of the types serialized with the given serializer.
	 * @param serializer The serializer to use.
	 */
	public <T extends Serializer<?> & Serializable>void addDefaultKryoSerializer(Class<?> type, T serializer) {
		config.addDefaultKryoSerializer(type, serializer);
	}

	/**
	 * Adds a new Kryo default serializer to the Runtime.
	 *
	 * @param type The class of the types serialized with the given serializer.
	 * @param serializerClass The class of the serializer to use.
	 */
	public void addDefaultKryoSerializer(Class<?> type, Class<? extends Serializer<?>> serializerClass) {
		config.addDefaultKryoSerializer(type, serializerClass);
	}

	/**
	 * Registers the given type with a Kryo Serializer.
	 *
	 * Note that the serializer instance must be serializable (as defined by java.io.Serializable),
	 * because it may be distributed to the worker nodes by java serialization.
	 *
	 * @param type The class of the types serialized with the given serializer.
	 * @param serializer The serializer to use.
	 */
	public <T extends Serializer<?> & Serializable>void registerTypeWithKryoSerializer(Class<?> type, T serializer) {
		config.registerTypeWithKryoSerializer(type, serializer);
	}

	/**
	 * Registers the given Serializer via its class as a serializer for the given type at the KryoSerializer
	 *
	 * @param type The class of the types serialized with the given serializer.
	 * @param serializerClass The class of the serializer to use.
	 */
	public void registerTypeWithKryoSerializer(Class<?> type, Class<? extends Serializer<?>> serializerClass) {
		config.registerTypeWithKryoSerializer(type, serializerClass);
	}

	/**
	 * Registers the given type with the serialization stack. If the type is eventually
	 * serialized as a POJO, then the type is registered with the POJO serializer. If the
	 * type ends up being serialized with Kryo, then it will be registered at Kryo to make
	 * sure that only tags are written.
	 *
	 * @param type The class of the type to register.
	 */
	public void registerType(Class<?> type) {
		if (type == null) {
			throw new NullPointerException("Cannot register null type class.");
		}

		TypeInformation<?> typeInfo = TypeExtractor.createTypeInfo(type);

		if (typeInfo instanceof PojoTypeInfo) {
			config.registerPojoType(type);
		} else {
			config.registerKryoType(type);
		}
	}

	// --------------------------------------------------------------------------------------------
	//  Data set creations
	// --------------------------------------------------------------------------------------------

	// ---------------------------------- Text Input Format ---------------------------------------

	/**
	 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
	 * The file will be read with the system's default character set.
	 *
	 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
	 * @return A {@link DataSet} that represents the data read from the given file as text lines.
	 */
	public DataSource<String> readTextFile(String filePath) {
		Preconditions.checkNotNull(filePath, "The file path may not be null.");

		return new DataSource<>(this, new TextInputFormat(new Path(filePath)), BasicTypeInfo.STRING_TYPE_INFO, Utils.getCallLocationName());
	}

	/**
	 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
	 * The {@link java.nio.charset.Charset} with the given name will be used to read the files.
	 *
	 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
	 * @param charsetName The name of the character set used to read the file.
	 * @return A {@link DataSet} that represents the data read from the given file as text lines.
	 */
	public DataSource<String> readTextFile(String filePath, String charsetName) {
		Preconditions.checkNotNull(filePath, "The file path may not be null.");

		TextInputFormat format = new TextInputFormat(new Path(filePath));
		format.setCharsetName(charsetName);
		return new DataSource<>(this, format, BasicTypeInfo.STRING_TYPE_INFO, Utils.getCallLocationName());
	}

	// -------------------------- Text Input Format With String Value------------------------------

	/**
	 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
	 * This method is similar to {@link #readTextFile(String)}, but it produces a DataSet with mutable
	 * {@link StringValue} objects, rather than Java Strings. StringValues can be used to tune implementations
	 * to be less object and garbage collection heavy.
	 * <p>
	 * The file will be read with the system's default character set.
	 *
	 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
	 * @return A {@link DataSet} that represents the data read from the given file as text lines.
	 */
	public DataSource<StringValue> readTextFileWithValue(String filePath) {
		Preconditions.checkNotNull(filePath, "The file path may not be null.");

		return new DataSource<>(this, new TextValueInputFormat(new Path(filePath)), new ValueTypeInfo<>(StringValue.class), Utils.getCallLocationName());
	}

	/**
	 * Creates a {@link DataSet} that represents the Strings produced by reading the given file line wise.
	 * This method is similar to {@link #readTextFile(String, String)}, but it produces a DataSet with mutable
	 * {@link StringValue} objects, rather than Java Strings. StringValues can be used to tune implementations
	 * to be less object and garbage collection heavy.
	 * <p>
	 * The {@link java.nio.charset.Charset} with the given name will be used to read the files.
	 *
	 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
	 * @param charsetName The name of the character set used to read the file.
	 * @param skipInvalidLines A flag to indicate whether to skip lines that cannot be read with the given character set.
	 *
	 * @return A DataSet that represents the data read from the given file as text lines.
	 */
	public DataSource<StringValue> readTextFileWithValue(String filePath, String charsetName, boolean skipInvalidLines) {
		Preconditions.checkNotNull(filePath, "The file path may not be null.");

		TextValueInputFormat format = new TextValueInputFormat(new Path(filePath));
		format.setCharsetName(charsetName);
		format.setSkipInvalidLines(skipInvalidLines);
		return new DataSource<>(this, format, new ValueTypeInfo<>(StringValue.class), Utils.getCallLocationName());
	}

	// ----------------------------------- Primitive Input Format ---------------------------------------

	/**
	 * Creates a {@link DataSet} that represents the primitive type produced by reading the given file line wise.
	 * This method is similar to {@link #readCsvFile(String)} with single field, but it produces a DataSet not through
	 * {@link org.apache.flink.api.java.tuple.Tuple1}.
	 *
	 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
	 * @param typeClass The primitive type class to be read.
	 * @return A {@link DataSet} that represents the data read from the given file as primitive type.
	 */
	public <X> DataSource<X> readFileOfPrimitives(String filePath, Class<X> typeClass) {
		Preconditions.checkNotNull(filePath, "The file path may not be null.");

		return new DataSource<>(this, new PrimitiveInputFormat<>(new Path(filePath), typeClass), TypeExtractor.getForClass(typeClass), Utils.getCallLocationName());
	}

	/**
	 * Creates a {@link DataSet} that represents the primitive type produced by reading the given file in delimited way.
	 * This method is similar to {@link #readCsvFile(String)} with single field, but it produces a DataSet not through
	 * {@link org.apache.flink.api.java.tuple.Tuple1}.
	 *
	 * @param filePath The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
	 * @param delimiter The delimiter of the given file.
	 * @param typeClass The primitive type class to be read.
	 * @return A {@link DataSet} that represents the data read from the given file as primitive type.
	 */
	public <X> DataSource<X> readFileOfPrimitives(String filePath, String delimiter, Class<X> typeClass) {
		Preconditions.checkNotNull(filePath, "The file path may not be null.");

		return new DataSource<>(this, new PrimitiveInputFormat<>(new Path(filePath), delimiter, typeClass), TypeExtractor.getForClass(typeClass), Utils.getCallLocationName());
	}

	// ----------------------------------- CSV Input Format ---------------------------------------

	/**
	 * Creates a CSV reader to read a comma separated value (CSV) file. The reader has options to
	 * define parameters and field types and will eventually produce the DataSet that corresponds to
	 * the read and parsed CSV input.
	 *
	 * @param filePath The path of the CSV file.
	 * @return A CsvReader that can be used to configure the CSV input.
	 */
	public CsvReader readCsvFile(String filePath) {
		return new CsvReader(filePath, this);
	}

	// ------------------------------------ File Input Format -----------------------------------------

	public <X> DataSource<X> readFile(FileInputFormat<X> inputFormat, String filePath) {
		if (inputFormat == null) {
			throw new IllegalArgumentException("InputFormat must not be null.");
		}
		if (filePath == null) {
			throw new IllegalArgumentException("The file path must not be null.");
		}

		inputFormat.setFilePath(new Path(filePath));
		try {
			return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat));
		}
		catch (Exception e) {
			throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " +
					"Please specify the TypeInformation of the produced type explicitly by using the " +
					"'createInput(InputFormat, TypeInformation)' method instead.");
		}
	}

	// ----------------------------------- Generic Input Format ---------------------------------------

	/**
	 * Generic method to create an input {@link DataSet} with in {@link InputFormat}. The DataSet will not be
	 * immediately created - instead, this method returns a DataSet that will be lazily created from
	 * the input format once the program is executed.
	 * <p>
	 * Since all data sets need specific information about their types, this method needs to determine
	 * the type of the data produced by the input format. It will attempt to determine the data type
	 * by reflection, unless the input format implements the {@link ResultTypeQueryable} interface.
	 * In the latter case, this method will invoke the {@link ResultTypeQueryable#getProducedType()}
	 * method to determine data type produced by the input format.
	 *
	 * @param inputFormat The input format used to create the data set.
	 * @return A {@link DataSet} that represents the data created by the input format.
	 *
	 * @see #createInput(InputFormat, TypeInformation)
	 */
	public <X> DataSource<X> createInput(InputFormat<X, ?> inputFormat) {
		if (inputFormat == null) {
			throw new IllegalArgumentException("InputFormat must not be null.");
		}

		try {
			return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat));
		}
		catch (Exception e) {
			throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " +
					"Please specify the TypeInformation of the produced type explicitly by using the " +
					"'createInput(InputFormat, TypeInformation)' method instead.", e);
		}
	}

	/**
	 * Generic method to create an input DataSet with in {@link InputFormat}. The {@link DataSet} will not be
	 * immediately created - instead, this method returns a {@link DataSet} that will be lazily created from
	 * the input format once the program is executed.
	 * <p>
	 * The {@link DataSet} is typed to the given TypeInformation. This method is intended for input formats that
	 * where the return type cannot be determined by reflection analysis, and that do not implement the
	 * {@link ResultTypeQueryable} interface.
	 *
	 * @param inputFormat The input format used to create the data set.
	 * @return A {@link DataSet} that represents the data created by the input format.
	 *
	 * @see #createInput(InputFormat)
	 */
	public <X> DataSource<X> createInput(InputFormat<X, ?> inputFormat, TypeInformation<X> producedType) {
		if (inputFormat == null) {
			throw new IllegalArgumentException("InputFormat must not be null.");
		}

		if (producedType == null) {
			throw new IllegalArgumentException("Produced type information must not be null.");
		}

		return new DataSource<>(this, inputFormat, producedType, Utils.getCallLocationName());
	}

	// ----------------------------------- Hadoop Input Format ---------------------------------------

	/**
	 * Creates a {@link DataSet} from the given {@link org.apache.hadoop.mapred.FileInputFormat}. The
	 * given inputName is set on the given job.
	 */
	@PublicEvolving
	public <K,V> DataSource<Tuple2<K, V>> readHadoopFile(org.apache.hadoop.mapred.FileInputFormat<K,V> mapredInputFormat, Class<K> key, Class<V> value, String inputPath, JobConf job) {
		DataSource<Tuple2<K, V>> result = createHadoopInput(mapredInputFormat, key, value, job);

		org.apache.hadoop.mapred.FileInputFormat.addInputPath(job, new org.apache.hadoop.fs.Path(inputPath));

		return result;
	}

	/**
	 * Creates a {@link DataSet} from {@link org.apache.hadoop.mapred.SequenceFileInputFormat}
	 * A {@link org.apache.hadoop.mapred.JobConf} with the given inputPath is created.
 	 */
<<<<<<< HEAD
	@PublicEvolving
=======
>>>>>>> 644c27504ad6fb89372e3b39123a4f896013e1ad
	public <K,V> DataSource<Tuple2<K, V>> readSequenceFile(Class<K> key, Class<V> value, String inputPath) throws IOException {
		return readHadoopFile(new org.apache.hadoop.mapred.SequenceFileInputFormat<K, V>(), key, value, inputPath);
	}

	/**
	 * Creates a {@link DataSet} from the given {@link org.apache.hadoop.mapred.FileInputFormat}. A
	 * {@link org.apache.hadoop.mapred.JobConf} with the given inputPath is created.
	 */
	@PublicEvolving
	public <K,V> DataSource<Tuple2<K, V>> readHadoopFile(org.apache.hadoop.mapred.FileInputFormat<K,V> mapredInputFormat, Class<K> key, Class<V> value, String inputPath) {
		return readHadoopFile(mapredInputFormat, key, value, inputPath, new JobConf());
	}

	/**
	 * Creates a {@link DataSet} from the given {@link org.apache.hadoop.mapred.InputFormat}.
	 */
	@PublicEvolving
	public <K,V> DataSource<Tuple2<K, V>> createHadoopInput(org.apache.hadoop.mapred.InputFormat<K,V> mapredInputFormat, Class<K> key, Class<V> value, JobConf job) {
		HadoopInputFormat<K, V> hadoopInputFormat = new HadoopInputFormat<>(mapredInputFormat, key, value, job);

		return this.createInput(hadoopInputFormat);
	}

	/**
	 * Creates a {@link DataSet} from the given {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}. The
	 * given inputName is set on the given job.
	 */
	@PublicEvolving
	public <K,V> DataSource<Tuple2<K, V>> readHadoopFile(org.apache.hadoop.mapreduce.lib.input.FileInputFormat<K,V> mapreduceInputFormat, Class<K> key, Class<V> value, String inputPath, Job job) throws IOException {
		DataSource<Tuple2<K, V>> result = createHadoopInput(mapreduceInputFormat, key, value, job);

		org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new org.apache
				.hadoop.fs.Path(inputPath));

		return result;
	}

	/**
	 * Creates a {@link DataSet} from the given {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}. A
	 * {@link org.apache.hadoop.mapreduce.Job} with the given inputPath is created.
	 */
	@PublicEvolving
	public <K,V> DataSource<Tuple2<K, V>> readHadoopFile(org.apache.hadoop.mapreduce.lib.input.FileInputFormat<K,V> mapreduceInputFormat, Class<K> key, Class<V> value, String inputPath) throws IOException {
		return readHadoopFile(mapreduceInputFormat, key, value, inputPath, Job.getInstance());
	}

	/**
	 * Creates a {@link DataSet} from the given {@link org.apache.hadoop.mapreduce.InputFormat}.
	 */
	@PublicEvolving
	public <K,V> DataSource<Tuple2<K, V>> createHadoopInput(org.apache.hadoop.mapreduce.InputFormat<K,V> mapreduceInputFormat, Class<K> key, Class<V> value, Job job) {
		org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat<K, V> hadoopInputFormat = new org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat<>(mapreduceInputFormat, key, value, job);

		return this.createInput(hadoopInputFormat);
	}
	
	// ----------------------------------- Collection ---------------------------------------
	
	/**
	 * Creates a DataSet from the given non-empty collection. The type of the data set is that
	 * of the elements in the collection.
	 * <p>
	 * The framework will try and determine the exact type from the collection elements.
	 * In case of generic elements, it may be necessary to manually supply the type information
	 * via {@link #fromCollection(Collection, TypeInformation)}.
	 * <p>
	 * Note that this operation will result in a non-parallel data source, i.e. a data source with
	 * a parallelism of one.
	 * 
	 * @param data The collection of elements to create the data set from.
	 * @return A DataSet representing the given collection.
	 * 
	 * @see #fromCollection(Collection, TypeInformation)
	 */
	public <X> DataSource<X> fromCollection(Collection<X> data) {
		if (data == null) {
			throw new IllegalArgumentException("The data must not be null.");
		}
		if (data.size() == 0) {
			throw new IllegalArgumentException("The size of the collection must not be empty.");
		}
		
		X firstValue = data.iterator().next();
		
		TypeInformation<X> type = TypeExtractor.getForObject(firstValue);
		CollectionInputFormat.checkCollection(data, type.getTypeClass());
		return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, Utils.getCallLocationName());
	}
	
	/**
	 * Creates a DataSet from the given non-empty collection. Note that this operation will result
	 * in a non-parallel data source, i.e. a data source with a parallelism of one.
	 * <p>
	 * The returned DataSet is typed to the given TypeInformation.
	 *  
	 * @param data The collection of elements to create the data set from.
	 * @param type The TypeInformation for the produced data set.
	 * @return A DataSet representing the given collection.
	 * 
	 * @see #fromCollection(Collection)
	 */
	public <X> DataSource<X> fromCollection(Collection<X> data, TypeInformation<X> type) {
		return fromCollection(data, type, Utils.getCallLocationName());
	}
	
	private <X> DataSource<X> fromCollection(Collection<X> data, TypeInformation<X> type, String callLocationName) {
		CollectionInputFormat.checkCollection(data, type.getTypeClass());
		return new DataSource<>(this, new CollectionInputFormat<>(data, type.createSerializer(config)), type, callLocationName);
	}
	
	/**
	 * Creates a DataSet from the given iterator. Because the iterator will remain unmodified until
	 * the actual execution happens, the type of data returned by the iterator must be given
	 * explicitly in the form of the type class (this is due to the fact that the Java compiler
	 * erases the generic type information).
	 * <p>
	 * Note that this operation will result in a non-parallel data source, i.e. a data source with
	 * a parallelism of one.
	 * 
	 * @param data The collection of elements to create the data set from.
	 * @param type The class of the data produced by the iterator. Must not be a generic class.
	 * @return A DataSet representing the elements in the iterator.
	 * 
	 * @see #fromCollection(Iterator, TypeInformation)
	 */
	public <X> DataSource<X> fromCollection(Iterator<X> data, Class<X> type) {
		return fromCollection(data, TypeExtractor.getForClass(type));
	}
	
	/**
	 * Creates a DataSet from the given iterator. Because the iterator will remain unmodified until
	 * the actual execution happens, the type of data returned by the iterator must be given
	 * explicitly in the form of the type information. This method is useful for cases where the type
	 * is generic. In that case, the type class (as given in {@link #fromCollection(Iterator, Class)}
	 * does not supply all type information.
	 * <p>
	 * Note that this operation will result in a non-parallel data source, i.e. a data source with
	 * a parallelism of one.
	 * 
	 * @param data The collection of elements to create the data set from.
	 * @param type The TypeInformation for the produced data set.
	 * @return A DataSet representing the elements in the iterator.
	 * 
	 * @see #fromCollection(Iterator, Class)
	 */
	public <X> DataSource<X> fromCollection(Iterator<X> data, TypeInformation<X> type) {
		return new DataSource<>(this, new IteratorInputFormat<>(data), type, Utils.getCallLocationName());
	}
	
	
	/**
	 * Creates a new data set that contains the given elements. The elements must all be of the same type,
	 * for example, all of the {@link String} or {@link Integer}. The sequence of elements must not be empty.
	 * <p>
	 * The framework will try and determine the exact type from the collection elements.
	 * In case of generic elements, it may be necessary to manually supply the type information
	 * via {@link #fromCollection(Collection, TypeInformation)}.
	 * <p>
	 * Note that this operation will result in a non-parallel data source, i.e. a data source with
	 * a parallelism of one.
	 * 
	 * @param data The elements to make up the data set.
	 * @return A DataSet representing the given list of elements.
	 */
	@SafeVarargs
	public final <X> DataSource<X> fromElements(X... data) {
		if (data == null) {
			throw new IllegalArgumentException("The data must not be null.");
		}
		if (data.length == 0) {
			throw new IllegalArgumentException("The number of elements must not be zero.");
		}
		
		TypeInformation<X> typeInfo;
		try {
			typeInfo = TypeExtractor.getForObject(data[0]);
		}
		catch (Exception e) {
			throw new RuntimeException("Could not create TypeInformation for type " + data[0].getClass().getName()
					+ "; please specify the TypeInformation manually via "
					+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)");
		}

		return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
	}
	
	/**
	 * Creates a new data set that contains the given elements. The framework will determine the type according to the 
	 * based type user supplied. The elements should be the same or be the subclass to the based type. 
	 * The sequence of elements must not be empty.
	 * Note that this operation will result in a non-parallel data source, i.e. a data source with
	 * a parallelism of one.
	 *
	 * @param type The base class type for every element in the collection.
	 * @param data The elements to make up the data set.
	 * @return A DataSet representing the given list of elements.
	 */
	@SafeVarargs
	public final <X> DataSource<X> fromElements(Class<X> type, X... data) {
		if (data == null) {
			throw new IllegalArgumentException("The data must not be null.");
		}
		if (data.length == 0) {
			throw new IllegalArgumentException("The number of elements must not be zero.");
		}
		
		TypeInformation<X> typeInfo;
		try {
			typeInfo = TypeExtractor.getForClass(type);
		}
		catch (Exception e) {
			throw new RuntimeException("Could not create TypeInformation for type " + type.getName()
					+ "; please specify the TypeInformation manually via "
					+ "ExecutionEnvironment#fromElements(Collection, TypeInformation)");
		}

		return fromCollection(Arrays.asList(data), typeInfo, Utils.getCallLocationName());
	}
	
	
	/**
	 * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the
	 * framework to create a parallel data source that returns the elements in the iterator.
	 * <p>
	 * Because the iterator will remain unmodified until the actual execution happens, the type of data
	 * returned by the iterator must be given explicitly in the form of the type class (this is due to the
	 * fact that the Java compiler erases the generic type information).
	 * 
	 * @param iterator The iterator that produces the elements of the data set.
	 * @param type The class of the data produced by the iterator. Must not be a generic class.
	 * @return A DataSet representing the elements in the iterator.
	 * 
	 * @see #fromParallelCollection(SplittableIterator, TypeInformation)
	 */
	public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, Class<X> type) {
		return fromParallelCollection(iterator, TypeExtractor.getForClass(type));
	}
	
	/**
	 * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the
	 * framework to create a parallel data source that returns the elements in the iterator.
	 * <p>
	 * Because the iterator will remain unmodified until the actual execution happens, the type of data
	 * returned by the iterator must be given explicitly in the form of the type information.
	 * This method is useful for cases where the type is generic. In that case, the type class
	 * (as given in {@link #fromParallelCollection(SplittableIterator, Class)} does not supply all type information.
	 * 
	 * @param iterator The iterator that produces the elements of the data set.
	 * @param type The TypeInformation for the produced data set.
	 * @return A DataSet representing the elements in the iterator.
	 * 
	 * @see #fromParallelCollection(SplittableIterator, Class)
	 */
	public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, TypeInformation<X> type) {
		return fromParallelCollection(iterator, type, Utils.getCallLocationName());
	}
	
	// private helper for passing different call location names
	private <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, TypeInformation<X> type, String callLocationName) {
		return new DataSource<>(this, new ParallelIteratorInputFormat<>(iterator), type, callLocationName);
	}
	
	/**
	 * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel,
	 * so there is no guarantee about the order of the elements.
	 * 
	 * @param from The number to start at (inclusive).
	 * @param to The number to stop at (inclusive).
	 * @return A DataSet, containing all number in the {@code [from, to]} interval.
	 */
	public DataSource<Long> generateSequence(long from, long to) {
		return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName());
	}	
	
	// --------------------------------------------------------------------------------------------
	//  Executing
	// --------------------------------------------------------------------------------------------
	
	/**
	 * Triggers the program execution. The environment will execute all parts of the program that have
	 * resulted in a "sink" operation. Sink operations are for example printing results ({@link DataSet#print()},
	 * writing results (e.g. {@link DataSet#writeAsText(String)},
	 * {@link DataSet#write(org.apache.flink.api.common.io.FileOutputFormat, String)}, or other generic
	 * data sinks created with {@link DataSet#output(org.apache.flink.api.common.io.OutputFormat)}.
	 * <p>
	 * The program execution will be logged and displayed with a generated default name.
	 * 
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 * @throws Exception Thrown, if the program executions fails.
	 */
	public JobExecutionResult execute() throws Exception {
		return execute(getDefaultName());
	}
	
	/**
	 * Triggers the program execution. The environment will execute all parts of the program that have
	 * resulted in a "sink" operation. Sink operations are for example printing results ({@link DataSet#print()},
	 * writing results (e.g. {@link DataSet#writeAsText(String)},
	 * {@link DataSet#write(org.apache.flink.api.common.io.FileOutputFormat, String)}, or other generic
	 * data sinks created with {@link DataSet#output(org.apache.flink.api.common.io.OutputFormat)}.
	 * <p>
	 * The program execution will be logged and displayed with the given job name.
	 * 
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 * @throws Exception Thrown, if the program executions fails.
	 */
	public abstract JobExecutionResult execute(String jobName) throws Exception;

	/**
	 * Creates the plan with which the system will execute the program, and returns it as 
	 * a String using a JSON representation of the execution data flow graph.
	 * Note that this needs to be called, before the plan is executed.
	 * 
	 * @return The execution plan of the program, as a JSON String.
	 * @throws Exception Thrown, if the compiler could not be instantiated, or the master could not
	 *                   be contacted to retrieve information relevant to the execution planning.
	 */
	public abstract String getExecutionPlan() throws Exception;
	
	/**
	 * Registers a file at the distributed cache under the given name. The file will be accessible
	 * from any user-defined function in the (distributed) runtime under a local path. Files
	 * may be local files (as long as all relevant workers have access to it), or files in a distributed file system.
	 * The runtime will copy the files temporarily to a local cache, if needed.
	 * <p>
	 * The {@link org.apache.flink.api.common.functions.RuntimeContext} can be obtained inside UDFs via
	 * {@link org.apache.flink.api.common.functions.RichFunction#getRuntimeContext()} and provides access
	 * {@link org.apache.flink.api.common.cache.DistributedCache} via 
	 * {@link org.apache.flink.api.common.functions.RuntimeContext#getDistributedCache()}.
	 * 
	 * @param filePath The path of the file, as a URI (e.g. "file:///some/path" or "hdfs://host:port/and/path")
	 * @param name The name under which the file is registered.
	 */
	public void registerCachedFile(String filePath, String name){
		registerCachedFile(filePath, name, false);
	}
	
	/**
	 * Registers a file at the distributed cache under the given name. The file will be accessible
	 * from any user-defined function in the (distributed) runtime under a local path. Files
	 * may be local files (as long as all relevant workers have access to it), or files in a distributed file system. 
	 * The runtime will copy the files temporarily to a local cache, if needed.
	 * <p>
	 * The {@link org.apache.flink.api.common.functions.RuntimeContext} can be obtained inside UDFs via
	 * {@link org.apache.flink.api.common.functions.RichFunction#getRuntimeContext()} and provides access
	 * {@link org.apache.flink.api.common.cache.DistributedCache} via 
	 * {@link org.apache.flink.api.common.functions.RuntimeContext#getDistributedCache()}.
	 * 
	 * @param filePath The path of the file, as a URI (e.g. "file:///some/path" or "hdfs://host:port/and/path")
	 * @param name The name under which the file is registered.
	 * @param executable flag indicating whether the file should be executable
	 */
	public void registerCachedFile(String filePath, String name, boolean executable){
		this.cacheFile.add(new Tuple2<>(name, new DistributedCacheEntry(filePath, executable)));
	}
	
	/**
	 * Registers all files that were registered at this execution environment's cache registry of the
	 * given plan's cache registry.
	 * 
	 * @param p The plan to register files at.
	 * @throws IOException Thrown if checks for existence and sanity fail.
	 */
	protected void registerCachedFilesWithPlan(Plan p) throws IOException {
		for (Tuple2<String, DistributedCacheEntry> entry : cacheFile) {
			p.registerCachedFile(entry.f0, entry.f1);
		}
	}
	
	/**
	 * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks,
	 * and operations and how they interact, as an isolated unit that can be executed with a
	 * {@link org.apache.flink.api.common.PlanExecutor}. Obtaining a plan and starting it with an
	 * executor is an alternative way to run a program and is only possible if the program consists
	 * only of distributed operations.
	 * This automatically starts a new stage of execution.
	 * 
	 * @return The program's plan.
	 */
	@Internal
	public Plan createProgramPlan() {
		return createProgramPlan(null);
	}
	
	/**
	 * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks,
	 * and operations and how they interact, as an isolated unit that can be executed with a
	 * {@link org.apache.flink.api.common.PlanExecutor}. Obtaining a plan and starting it with an
	 * executor is an alternative way to run a program and is only possible if the program consists
	 * only of distributed operations.
	 * This automatically starts a new stage of execution.
	 * 
	 * @param jobName The name attached to the plan (displayed in logs and monitoring).
	 * @return The program's plan.
	 */
	@Internal
	public Plan createProgramPlan(String jobName) {
		return createProgramPlan(jobName, true);
	}

	/**
	 * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks,
	 * and operations and how they interact, as an isolated unit that can be executed with a
	 * {@link org.apache.flink.api.common.PlanExecutor}. Obtaining a plan and starting it with an
	 * executor is an alternative way to run a program and is only possible if the program consists
	 * only of distributed operations.
	 *
	 * @param jobName The name attached to the plan (displayed in logs and monitoring).
	 * @param clearSinks Whether or not to start a new stage of execution.
	 * @return The program's plan.
	 */
	@Internal
	public Plan createProgramPlan(String jobName, boolean clearSinks) {
		if (this.sinks.isEmpty()) {
			if (wasExecuted) {
				throw new RuntimeException("No new data sinks have been defined since the " +
						"last execution. The last execution refers to the latest call to " +
						"'execute()', 'count()', 'collect()', or 'print()'.");
			} else {
				throw new RuntimeException("No data sinks have been created yet. " +
						"A program needs at least one sink that consumes data. " +
						"Examples are writing the data set or printing it.");
			}
		}
		
		if (jobName == null) {
			jobName = getDefaultName();
		}
		
		OperatorTranslation translator = new OperatorTranslation();
		Plan plan = translator.translateToPlan(this.sinks, jobName);

		if (getParallelism() > 0) {
			plan.setDefaultParallelism(getParallelism());
		}
		plan.setExecutionConfig(getConfig());
		
		// Check plan for GenericTypeInfo's and register the types at the serializers.
<<<<<<< HEAD
		if (!config.isAutoTypeRegistrationDisabled()) {
			plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() {
				
				private final HashSet<Class<?>> deduplicator = new HashSet<>();
				
				@Override
				public boolean preVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {
					OperatorInformation<?> opInfo = visitable.getOperatorInfo();
					Serializers.recursivelyRegisterType(opInfo.getOutputType(), config, deduplicator);
					return true;
=======
		plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() {
			@Override
			public boolean preVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {
				OperatorInformation<?> opInfo = visitable.getOperatorInfo();
				TypeInformation<?> typeInfo = opInfo.getOutputType();
				if(typeInfo instanceof GenericTypeInfo) {
					GenericTypeInfo<?> genericTypeInfo = (GenericTypeInfo<?>) typeInfo;
					if(!config.isAutoTypeRegistrationDisabled()) {
						Serializers.recursivelyRegisterType(genericTypeInfo.getTypeClass(), config);
					}
				}
				if(typeInfo instanceof CompositeType) {
					List<GenericTypeInfo<?>> genericTypesInComposite = new ArrayList<>();
					Utils.getContainedGenericTypes((CompositeType<?>)typeInfo, genericTypesInComposite);
					for(GenericTypeInfo<?> gt : genericTypesInComposite) {
						Serializers.recursivelyRegisterType(gt.getTypeClass(), config);
					}
>>>>>>> 644c27504ad6fb89372e3b39123a4f896013e1ad
				}
				@Override
				public void postVisit(org.apache.flink.api.common.operators.Operator<?> visitable) {}
			});
		}

		try {
			registerCachedFilesWithPlan(plan);
		} catch (Exception e) {
			throw new RuntimeException("Error while registering cached files: " + e.getMessage(), e);
		}
		
		// clear all the sinks such that the next execution does not redo everything
		if (clearSinks) {
			this.sinks.clear();
			wasExecuted = true;
		}

		// All types are registered now. Print information.
		int registeredTypes = config.getRegisteredKryoTypes().size() +
				config.getRegisteredPojoTypes().size() +
				config.getRegisteredTypesWithKryoSerializerClasses().size() +
				config.getRegisteredTypesWithKryoSerializers().size();
		int defaultKryoSerializers = config.getDefaultKryoSerializers().size() +
				config.getDefaultKryoSerializerClasses().size();
		LOG.info("The job has {} registered types and {} default Kryo serializers", registeredTypes, defaultKryoSerializers);

		if(config.isForceKryoEnabled() && config.isForceAvroEnabled()) {
			LOG.warn("In the ExecutionConfig, both Avro and Kryo are enforced. Using Kryo serializer");
		}
		if(config.isForceKryoEnabled()) {
			LOG.info("Using KryoSerializer for serializing POJOs");
		}
		if(config.isForceAvroEnabled()) {
			LOG.info("Using AvroSerializer for serializing POJOs");
		}

		if(LOG.isDebugEnabled()) {
			LOG.debug("Registered Kryo types: {}", config.getRegisteredKryoTypes().toString());
			LOG.debug("Registered Kryo with Serializers types: {}", config.getRegisteredTypesWithKryoSerializers().entrySet().toString());
			LOG.debug("Registered Kryo with Serializer Classes types: {}", config.getRegisteredTypesWithKryoSerializerClasses().entrySet().toString());
			LOG.debug("Registered Kryo default Serializers: {}", config.getDefaultKryoSerializers().entrySet().toString());
			LOG.debug("Registered Kryo default Serializers Classes {}", config.getDefaultKryoSerializerClasses().entrySet().toString());
			LOG.debug("Registered POJO types: {}", config.getRegisteredPojoTypes().toString());

			// print information about static code analysis
			LOG.debug("Static code analysis mode: {}", config.getCodeAnalysisMode());
		}

		return plan;
	}
	
	/**
	 * Adds the given sink to this environment. Only sinks that have been added will be executed once
	 * the {@link #execute()} or {@link #execute(String)} method is called.
	 * 
	 * @param sink The sink to add for execution.
	 */
	@Internal
	void registerDataSink(DataSink<?> sink) {
		this.sinks.add(sink);
	}
	
	/**
	 * Gets a default job name, based on the timestamp when this method is invoked.
	 * 
	 * @return A default job name.
	 */
	private static String getDefaultName() {
		return "Flink Java Job at " + Calendar.getInstance().getTime();
	}
	
	// --------------------------------------------------------------------------------------------
	//  Instantiation of Execution Contexts
	// --------------------------------------------------------------------------------------------

	/**
	 * Creates an execution environment that represents the context in which the program is currently executed.
	 * If the program is invoked standalone, this method returns a local execution environment, as returned by
	 * {@link #createLocalEnvironment()}. If the program is invoked from within the command line client to be
	 * submitted to a cluster, this method returns the execution environment of this cluster.
	 * 
	 * @return The execution environment of the context in which the program is executed.
	 */
	public static ExecutionEnvironment getExecutionEnvironment() {
		return contextEnvironmentFactory == null ? 
				createLocalEnvironment() : contextEnvironmentFactory.createExecutionEnvironment();
	}

	/**
	 * Creates a {@link CollectionEnvironment} that uses Java Collections underneath. This will execute in a
	 * single thread in the current JVM. It is very fast but will fail if the data does not fit into
	 * memory. parallelism will always be 1. This is useful during implementation and for debugging.
	 * @return A Collection Environment
	 */
	@PublicEvolving
	public static CollectionEnvironment createCollectionsEnvironment(){
		CollectionEnvironment ce = new CollectionEnvironment();
		ce.setParallelism(1);
		return ce;
	}

	/**
	 * Creates a {@link LocalEnvironment}. The local execution environment will run the program in a
	 * multi-threaded fashion in the same JVM as the environment was created in. The default
	 * parallelism of the local environment is the number of hardware contexts (CPU cores / threads),
	 * unless it was specified differently by {@link #setDefaultLocalParallelism(int)}.
	 * 
	 * @return A local execution environment.
	 */
	public static LocalEnvironment createLocalEnvironment() {
		return createLocalEnvironment(defaultLocalDop);
	}
	
	/**
	 * Creates a {@link LocalEnvironment}. The local execution environment will run the program in a
	 * multi-threaded fashion in the same JVM as the environment was created in. It will use the
	 * parallelism specified in the parameter.
	 * 
	 * @param parallelism The parallelism for the local environment.
	 * @return A local execution environment with the specified parallelism.
	 */
	public static LocalEnvironment createLocalEnvironment(int parallelism) {
		LocalEnvironment lee = new LocalEnvironment();
		lee.setParallelism(parallelism);
		return lee;
	}

	/**
	 * Creates a {@link LocalEnvironment}. The local execution environment will run the program in a
	 * multi-threaded fashion in the same JVM as the environment was created in. It will use the
	 * parallelism specified in the parameter.
	 *
	 * @param customConfiguration Pass a custom configuration to the LocalEnvironment.
	 * @return A local execution environment with the specified parallelism.
	 */
	public static LocalEnvironment createLocalEnvironment(Configuration customConfiguration) {
		return new LocalEnvironment(customConfiguration);
	}
	
	/**
	 * Creates a {@link RemoteEnvironment}. The remote environment sends (parts of) the program 
	 * to a cluster for execution. Note that all file paths used in the program must be accessible from the
	 * cluster. The execution will use the cluster's default parallelism, unless the parallelism is
	 * set explicitly via {@link ExecutionEnvironment#setParallelism(int)}.
	 * 
	 * @param host The host name or address of the master (JobManager), where the program should be executed.
	 * @param port The port of the master (JobManager), where the program should be executed. 
	 * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the program uses
	 *                 user-defined functions, user-defined input formats, or any libraries, those must be
	 *                 provided in the JAR files.
	 * @return A remote environment that executes the program on a cluster.
	 */
	public static ExecutionEnvironment createRemoteEnvironment(String host, int port, String... jarFiles) {
		return new RemoteEnvironment(host, port, jarFiles);
	}

	/**
	 * Creates a {@link RemoteEnvironment}. The remote environment sends (parts of) the program
	 * to a cluster for execution. Note that all file paths used in the program must be accessible from the
	 * cluster. The custom configuration file is used to configure Akka specific configuration parameters
	 * for the Client only; Program parallelism can be set via {@link ExecutionEnvironment#setParallelism(int)}.
	 *
	 * Cluster configuration has to be done in the remotely running Flink instance.
	 *
	 * @param host The host name or address of the master (JobManager), where the program should be executed.
	 * @param port The port of the master (JobManager), where the program should be executed.
	 * @param clientConfiguration Configuration used by the client that connects to the cluster.
	 * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the program uses
	 *                 user-defined functions, user-defined input formats, or any libraries, those must be
	 *                 provided in the JAR files.
	 * @return A remote environment that executes the program on a cluster.
	 */
	public static ExecutionEnvironment createRemoteEnvironment(
			String host, int port, Configuration clientConfiguration, String... jarFiles) {
		return new RemoteEnvironment(host, port, clientConfiguration, jarFiles, null);
	}

	/**
	 * Creates a {@link RemoteEnvironment}. The remote environment sends (parts of) the program 
	 * to a cluster for execution. Note that all file paths used in the program must be accessible from the
	 * cluster. The execution will use the specified parallelism.
	 * 
	 * @param host The host name or address of the master (JobManager), where the program should be executed.
	 * @param port The port of the master (JobManager), where the program should be executed. 
	 * @param parallelism The parallelism to use during the execution.
	 * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the program uses
	 *                 user-defined functions, user-defined input formats, or any libraries, those must be
	 *                 provided in the JAR files.
	 * @return A remote environment that executes the program on a cluster.
	 */
	public static ExecutionEnvironment createRemoteEnvironment(String host, int port, int parallelism, String... jarFiles) {
		RemoteEnvironment rec = new RemoteEnvironment(host, port, jarFiles);
		rec.setParallelism(parallelism);
		return rec;
	}
	
	/**
	 * Sets the default parallelism that will be used for the local execution environment created by
	 * {@link #createLocalEnvironment()}.
	 * 
	 * @param parallelism The parallelism to use as the default local parallelism.
	 */
	public static void setDefaultLocalParallelism(int parallelism) {
		defaultLocalDop = parallelism;
	}
	
	// --------------------------------------------------------------------------------------------
	//  Methods to control the context environment and creation of explicit environments other
	//  than the context environment
	// --------------------------------------------------------------------------------------------

	/**
	 * Sets a context environment factory, that creates the context environment for running programs
	 * with pre-configured environments. Examples are running programs from the command line, and
	 * running programs in the Scala shell.
	 * 
	 * <p>When the context environment factors is set, no other environments can be explicitly used.
	 * 
	 * @param ctx The context environment factory.
	 */
	protected static void initializeContextEnvironment(ExecutionEnvironmentFactory ctx) {
		contextEnvironmentFactory = Preconditions.checkNotNull(ctx);
	}

	/**
	 * Un-sets the context environment factory. After this method is called, the call to
	 * {@link #getExecutionEnvironment()} will again return a default local execution environment, and
	 * it is possible to explicitly instantiate the LocalEnvironment and the RemoteEnvironment.
	 */
	protected static void resetContextEnvironment() {
		contextEnvironmentFactory = null;
	}

	/**
	 * Checks whether it is currently permitted to explicitly instantiate a LocalEnvironment
	 * or a RemoteEnvironment.
	 * 
	 * @return True, if it is possible to explicitly instantiate a LocalEnvironment or a
	 *         RemoteEnvironment, false otherwise.
	 */
	@Internal
	public static boolean areExplicitEnvironmentsAllowed() {
		return contextEnvironmentFactory == null;
	}
}



/**
	 * Triggers the program execution. The environment will execute all parts of
	 * the program that have resulted in a "sink" operation. Sink operations are
	 * for example printing results or forwarding them to a message queue.
	 * <p>
	 * The program execution will be logged and displayed with a generated
	 * default name.
	 *
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 * @throws Exception which occurs during job execution.
	 */
	public JobExecutionResult execute() throws Exception {
		return execute(DEFAULT_JOB_NAME);
	}

/**
	 * Triggers the program execution. The environment will execute all parts of
	 * the program that have resulted in a "sink" operation. Sink operations are
	 * for example printing results or forwarding them to a message queue.
	 * <p>
	 * The program execution will be logged and displayed with the provided name
	 *
	 * @param jobName
	 * 		Desired name of the job
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 * @throws Exception which occurs during job execution.
	 */
	public abstract JobExecutionResult execute(String jobName) throws Exception;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.environment;

import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster;

import org.apache.flink.streaming.api.graph.StreamGraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The LocalStreamEnvironment is a StreamExecutionEnvironment that runs the program locally,
 * multi-threaded, in the JVM where the environment is instantiated. It spawns an embedded
 * Flink cluster in the background and executes the program on that cluster.
 *
 * <p>When this environment is instantiated, it uses a default parallelism of {@code 1}. The default
 * parallelism can be set via {@link #setParallelism(int)}.
 *
 * <p>Local environments can also be instantiated through {@link StreamExecutionEnvironment#createLocalEnvironment()}
 * and {@link StreamExecutionEnvironment#createLocalEnvironment(int)}. The former version will pick a
 * default parallelism equal to the number of hardware contexts in the local machine.
 */
@Public
public class LocalStreamEnvironment extends StreamExecutionEnvironment {

	private static final Logger LOG = LoggerFactory.getLogger(LocalStreamEnvironment.class);
	
	/** The configuration to use for the local cluster */
	private final Configuration conf;

	/**
	 * Creates a new local stream environment that uses the default configuration.
	 */
	public LocalStreamEnvironment() {
		this(null);
	}

	/**
	 * Creates a new local stream environment that configures its local executor with the given configuration.
	 *
	 * @param config The configuration used to configure the local executor.
	 */
	public LocalStreamEnvironment(Configuration config) {
		if (!ExecutionEnvironment.areExplicitEnvironmentsAllowed()) {
			throw new InvalidProgramException(
					"The LocalStreamEnvironment cannot be used when submitting a program through a client, " +
							"or running in a TestEnvironment context.");
		}
		
		this.conf = config == null ? new Configuration() : config;
	}

	/**
	 * Executes the JobGraph of the on a mini cluster of CLusterUtil with a user
	 * specified name.
	 * 
	 * @param jobName
	 *            name of the job
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 */
	@Override
	public JobExecutionResult execute(String jobName) throws Exception {
		// transform the streaming program into a JobGraph
		StreamGraph streamGraph = getStreamGraph();
		streamGraph.setJobName(jobName);

		JobGraph jobGraph = streamGraph.getJobGraph();

		Configuration configuration = new Configuration();
		configuration.addAll(jobGraph.getJobConfiguration());

		configuration.setLong(ConfigConstants.TASK_MANAGER_MEMORY_SIZE_KEY, -1L);
		configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, jobGraph.getMaximumParallelism());
		
		// add (and override) the settings with what the user defined
		configuration.addAll(this.conf);
		
		if (LOG.isInfoEnabled()) {
			LOG.info("Running job on local embedded Flink mini cluster");
		}

		LocalFlinkMiniCluster exec = new LocalFlinkMiniCluster(configuration, true);
		try {
			exec.start();
			return exec.submitJobAndWait(jobGraph, getConfig().isSysoutLoggingEnabled());
		}
		finally {
			transformations.clear();
			exec.stop();
		}
	}
}

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.graph;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.io.InputFormat;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.InputTypeConfigurable;
import org.apache.flink.api.java.typeutils.MissingTypeInfo;
import org.apache.flink.optimizer.plan.StreamingPlan;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.streaming.api.collector.selector.OutputSelector;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.operators.OutputTypeConfigurable;
import org.apache.flink.streaming.api.operators.StoppableStreamSource;
import org.apache.flink.streaming.api.operators.StreamOperator;
import org.apache.flink.streaming.api.operators.StreamSource;
import org.apache.flink.streaming.api.operators.TwoInputStreamOperator;
import org.apache.flink.runtime.state.AbstractStateBackend;
import org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner;
import org.apache.flink.streaming.runtime.partitioner.RebalancePartitioner;
import org.apache.flink.streaming.runtime.partitioner.StreamPartitioner;
import org.apache.flink.streaming.runtime.tasks.OneInputStreamTask;
import org.apache.flink.streaming.runtime.tasks.SourceStreamTask;
import org.apache.flink.streaming.runtime.tasks.StoppableSourceStreamTask;
import org.apache.flink.streaming.runtime.tasks.StreamIterationHead;
import org.apache.flink.streaming.runtime.tasks.StreamIterationTail;
import org.apache.flink.streaming.runtime.tasks.TwoInputStreamTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Class representing the streaming topology. It contains all the information
 * necessary to build the jobgraph for the execution.
 * 
 */
@Internal
public class StreamGraph extends StreamingPlan {
	
	private static final Logger LOG = LoggerFactory.getLogger(StreamGraph.class);

	private String jobName = StreamExecutionEnvironment.DEFAULT_JOB_NAME;

	private final StreamExecutionEnvironment environment;
	private final ExecutionConfig executionConfig;
	private final CheckpointConfig checkpointConfig;
	
	private boolean chaining;

	private Map<Integer, StreamNode> streamNodes;
	private Set<Integer> sources;
	private Set<Integer> sinks;
	private Map<Integer, Tuple2<Integer, List<String>>> virtualSelectNodes;
	private Map<Integer, Tuple2<Integer, StreamPartitioner<?>>> virtuaPartitionNodes;

	protected Map<Integer, String> vertexIDtoBrokerID;
	protected Map<Integer, Long> vertexIDtoLoopTimeout;
	private AbstractStateBackend stateBackend;
	private Set<Tuple2<StreamNode, StreamNode>> iterationSourceSinkPairs;


	public StreamGraph(StreamExecutionEnvironment environment) {
		this.environment = environment;
		this.executionConfig = environment.getConfig();
		this.checkpointConfig = environment.getCheckpointConfig();

		// create an empty new stream graph.
		clear();
	}

	/**
	 * Remove all registered nodes etc.
	 */
	public void clear() {
		streamNodes = new HashMap<>();
		virtualSelectNodes = new HashMap<>();
		virtuaPartitionNodes = new HashMap<>();
		vertexIDtoBrokerID = new HashMap<>();
		vertexIDtoLoopTimeout  = new HashMap<>();
		iterationSourceSinkPairs = new HashSet<>();
		sources = new HashSet<>();
		sinks = new HashSet<>();
	}
	
	
	public StreamExecutionEnvironment getEnvironment() {
		return environment;
	}

	public ExecutionConfig getExecutionConfig() {
		return executionConfig;
	}
	
	public CheckpointConfig getCheckpointConfig() {
		return checkpointConfig;
	}

	public String getJobName() {
		return jobName;
	}

	public void setJobName(String jobName) {
		this.jobName = jobName;
	}

	public void setChaining(boolean chaining) {
		this.chaining = chaining;
	}

	public void setStateBackend(AbstractStateBackend backend) {
		this.stateBackend = backend;
	}

	public AbstractStateBackend getStateBackend() {
		return this.stateBackend;
	}

	// Checkpointing
	
	public boolean isChainingEnabled() {
		return chaining;
	}
	

	public boolean isIterative() {
		return!vertexIDtoLoopTimeout.isEmpty();
	}

	public <IN, OUT> void addSource(Integer vertexID,
		String slotSharingGroup,
		StreamOperator<OUT> operatorObject,
		TypeInformation<IN> inTypeInfo,
		TypeInformation<OUT> outTypeInfo,
		String operatorName) {
		addOperator(vertexID, slotSharingGroup, operatorObject, inTypeInfo, outTypeInfo, operatorName);
		sources.add(vertexID);
	}

	public <IN, OUT> void addSink(Integer vertexID,
		String slotSharingGroup,
		StreamOperator<OUT> operatorObject,
		TypeInformation<IN> inTypeInfo,
		TypeInformation<OUT> outTypeInfo,
		String operatorName) {
		addOperator(vertexID, slotSharingGroup, operatorObject, inTypeInfo, outTypeInfo, operatorName);
		sinks.add(vertexID);
	}

	public <IN, OUT> void addOperator(
			Integer vertexID,
			String slotSharingGroup,
			StreamOperator<OUT> operatorObject,
			TypeInformation<IN> inTypeInfo,
			TypeInformation<OUT> outTypeInfo,
			String operatorName) {

		if (operatorObject instanceof StoppableStreamSource) {
			addNode(vertexID, slotSharingGroup, StoppableSourceStreamTask.class, operatorObject, operatorName);
		} else if (operatorObject instanceof StreamSource) {
			addNode(vertexID, slotSharingGroup, SourceStreamTask.class, operatorObject, operatorName);
		} else {
			addNode(vertexID, slotSharingGroup, OneInputStreamTask.class, operatorObject, operatorName);
		}

		TypeSerializer<IN> inSerializer = inTypeInfo != null && !(inTypeInfo instanceof MissingTypeInfo) ? inTypeInfo.createSerializer(executionConfig) : null;

		TypeSerializer<OUT> outSerializer = outTypeInfo != null && !(outTypeInfo instanceof MissingTypeInfo) ? outTypeInfo.createSerializer(executionConfig) : null;

		setSerializers(vertexID, inSerializer, null, outSerializer);

		if (operatorObject instanceof OutputTypeConfigurable) {
			@SuppressWarnings("unchecked")
			OutputTypeConfigurable<OUT> outputTypeConfigurable = (OutputTypeConfigurable<OUT>) operatorObject;
			// sets the output type which must be know at StreamGraph creation time
			outputTypeConfigurable.setOutputType(outTypeInfo, executionConfig);
		}

		if (operatorObject instanceof InputTypeConfigurable) {
			InputTypeConfigurable inputTypeConfigurable = (InputTypeConfigurable) operatorObject;
			inputTypeConfigurable.setInputType(inTypeInfo, executionConfig);
		}

		if (LOG.isDebugEnabled()) {
			LOG.debug("Vertex: {}", vertexID);
		}
	}

	public <IN1, IN2, OUT> void addCoOperator(
			Integer vertexID,
			String slotSharingGroup,
			TwoInputStreamOperator<IN1, IN2, OUT> taskOperatorObject,
			TypeInformation<IN1> in1TypeInfo,
			TypeInformation<IN2> in2TypeInfo,
			TypeInformation<OUT> outTypeInfo,
			String operatorName) {

		addNode(vertexID, slotSharingGroup, TwoInputStreamTask.class, taskOperatorObject, operatorName);

		TypeSerializer<OUT> outSerializer = (outTypeInfo != null) && !(outTypeInfo instanceof MissingTypeInfo) ?
				outTypeInfo.createSerializer(executionConfig) : null;

		setSerializers(vertexID, in1TypeInfo.createSerializer(executionConfig), in2TypeInfo.createSerializer(executionConfig), outSerializer);

		if (taskOperatorObject instanceof OutputTypeConfigurable) {
			@SuppressWarnings("unchecked")
			OutputTypeConfigurable<OUT> outputTypeConfigurable = (OutputTypeConfigurable<OUT>) taskOperatorObject;
			// sets the output type which must be know at StreamGraph creation time
			outputTypeConfigurable.setOutputType(outTypeInfo, executionConfig);
		}

		if (LOG.isDebugEnabled()) {
			LOG.debug("CO-TASK: {}", vertexID);
		}
	}

	protected StreamNode addNode(Integer vertexID,
		String slotSharingGroup,
		Class<? extends AbstractInvokable> vertexClass,
		StreamOperator<?> operatorObject,
		String operatorName) {

		if (streamNodes.containsKey(vertexID)) {
			throw new RuntimeException("Duplicate vertexID " + vertexID);
		}

		StreamNode vertex = new StreamNode(environment,
			vertexID,
			slotSharingGroup,
			operatorObject,
			operatorName,
			new ArrayList<OutputSelector<?>>(),
			vertexClass);

		streamNodes.put(vertexID, vertex);

		return vertex;
	}

	/**
	 * Adds a new virtual node that is used to connect a downstream vertex to only the outputs
	 * with the selected names.
	 *
	 * When adding an edge from the virtual node to a downstream node the connection will be made
	 * to the original node, only with the selected names given here.
	 *
	 * @param originalId ID of the node that should be connected to.
	 * @param virtualId ID of the virtual node.
	 * @param selectedNames The selected names.
	 */
	public void addVirtualSelectNode(Integer originalId, Integer virtualId, List<String> selectedNames) {

		if (virtualSelectNodes.containsKey(virtualId)) {
			throw new IllegalStateException("Already has virtual select node with id " + virtualId);
		}

		virtualSelectNodes.put(virtualId,
				new Tuple2<Integer, List<String>>(originalId, selectedNames));
	}

	/**
	 * Adds a new virtual node that is used to connect a downstream vertex to an input with a certain
	 * partitioning.
	 *
	 * When adding an edge from the virtual node to a downstream node the connection will be made
	 * to the original node, but with the partitioning given here.
	 *
	 * @param originalId ID of the node that should be connected to.
	 * @param virtualId ID of the virtual node.
	 * @param partitioner The partitioner
	 */
	public void addVirtualPartitionNode(Integer originalId, Integer virtualId, StreamPartitioner<?> partitioner) {

		if (virtuaPartitionNodes.containsKey(virtualId)) {
			throw new IllegalStateException("Already has virtual partition node with id " + virtualId);
		}

		virtuaPartitionNodes.put(virtualId,
				new Tuple2<Integer, StreamPartitioner<?>>(originalId, partitioner));
	}

	/**
	 * Determines the slot sharing group of an operation across virtual nodes.
	 */
	public String getSlotSharingGroup(Integer id) {
		if (virtualSelectNodes.containsKey(id)) {
			Integer mappedId = virtualSelectNodes.get(id).f0;
			return getSlotSharingGroup(mappedId);
		} else if (virtuaPartitionNodes.containsKey(id)) {
			Integer mappedId = virtuaPartitionNodes.get(id).f0;
			return getSlotSharingGroup(mappedId);
		} else {
			StreamNode node = getStreamNode(id);
			return node.getSlotSharingGroup();
		}
	}

	public void addEdge(Integer upStreamVertexID, Integer downStreamVertexID, int typeNumber) {
		addEdgeInternal(upStreamVertexID,
				downStreamVertexID,
				typeNumber,
				null,
				new ArrayList<String>());

	}

	private void addEdgeInternal(Integer upStreamVertexID,
			Integer downStreamVertexID,
			int typeNumber,
			StreamPartitioner<?> partitioner,
			List<String> outputNames) {


		if (virtualSelectNodes.containsKey(upStreamVertexID)) {
			int virtualId = upStreamVertexID;
			upStreamVertexID = virtualSelectNodes.get(virtualId).f0;
			if (outputNames.isEmpty()) {
				// selections that happen downstream override earlier selections
				outputNames = virtualSelectNodes.get(virtualId).f1;
			}
			addEdgeInternal(upStreamVertexID, downStreamVertexID, typeNumber, partitioner, outputNames);
		} else if (virtuaPartitionNodes.containsKey(upStreamVertexID)) {
			int virtualId = upStreamVertexID;
			upStreamVertexID = virtuaPartitionNodes.get(virtualId).f0;
			if (partitioner == null) {
				partitioner = virtuaPartitionNodes.get(virtualId).f1;
			}
			addEdgeInternal(upStreamVertexID, downStreamVertexID, typeNumber, partitioner, outputNames);
		} else {
			StreamNode upstreamNode = getStreamNode(upStreamVertexID);
			StreamNode downstreamNode = getStreamNode(downStreamVertexID);

			// If no partitioner was specified and the parallelism of upstream and downstream
			// operator matches use forward partitioning, use rebalance otherwise.
			if (partitioner == null && upstreamNode.getParallelism() == downstreamNode.getParallelism()) {
				partitioner = new ForwardPartitioner<Object>();
			} else if (partitioner == null) {
				partitioner = new RebalancePartitioner<Object>();
			}

			if (partitioner instanceof ForwardPartitioner) {
				if (upstreamNode.getParallelism() != downstreamNode.getParallelism()) {
					throw new UnsupportedOperationException("Forward partitioning does not allow " +
							"change of parallelism. Upstream operation: " + upstreamNode + " parallelism: " + upstreamNode.getParallelism() +
							", downstream operation: " + downstreamNode + " parallelism: " + downstreamNode.getParallelism() +
							" You must use another partitioning strategy, such as broadcast, rebalance, shuffle or global.");
				}
			}

			StreamEdge edge = new StreamEdge(upstreamNode, downstreamNode, typeNumber, outputNames, partitioner);

			getStreamNode(edge.getSourceId()).addOutEdge(edge);
			getStreamNode(edge.getTargetId()).addInEdge(edge);
		}
	}

	public <T> void addOutputSelector(Integer vertexID, OutputSelector<T> outputSelector) {
		if (virtuaPartitionNodes.containsKey(vertexID)) {
			addOutputSelector(virtuaPartitionNodes.get(vertexID).f0, outputSelector);
		} else if (virtualSelectNodes.containsKey(vertexID)) {
			addOutputSelector(virtualSelectNodes.get(vertexID).f0, outputSelector);
		} else {
			getStreamNode(vertexID).addOutputSelector(outputSelector);

			if (LOG.isDebugEnabled()) {
				LOG.debug("Outputselector set for {}", vertexID);
			}
		}

	}

	public void setParallelism(Integer vertexID, int parallelism) {
		if (getStreamNode(vertexID) != null) {
			getStreamNode(vertexID).setParallelism(parallelism);
		}
	}

	public void setOneInputStateKey(Integer vertexID, KeySelector<?, ?> keySelector, TypeSerializer<?> keySerializer) {
		StreamNode node = getStreamNode(vertexID);
		node.setStatePartitioner1(keySelector);
		node.setStateKeySerializer(keySerializer);
	}

	public void setTwoInputStateKey(Integer vertexID, KeySelector<?, ?> keySelector1, KeySelector<?, ?> keySelector2, TypeSerializer<?> keySerializer) {
		StreamNode node = getStreamNode(vertexID);
		node.setStatePartitioner1(keySelector1);
		node.setStatePartitioner2(keySelector2);
		node.setStateKeySerializer(keySerializer);
	}

	public void setBufferTimeout(Integer vertexID, long bufferTimeout) {
		if (getStreamNode(vertexID) != null) {
			getStreamNode(vertexID).setBufferTimeout(bufferTimeout);
		}
	}

	public void setSerializers(Integer vertexID, TypeSerializer<?> in1, TypeSerializer<?> in2, TypeSerializer<?> out) {
		StreamNode vertex = getStreamNode(vertexID);
		vertex.setSerializerIn1(in1);
		vertex.setSerializerIn2(in2);
		vertex.setSerializerOut(out);
	}

	public void setSerializersFrom(Integer from, Integer to) {
		StreamNode fromVertex = getStreamNode(from);
		StreamNode toVertex = getStreamNode(to);

		toVertex.setSerializerIn1(fromVertex.getTypeSerializerOut());
		toVertex.setSerializerOut(fromVertex.getTypeSerializerIn1());
	}

	public <OUT> void setOutType(Integer vertexID, TypeInformation<OUT> outType) {
		getStreamNode(vertexID).setSerializerOut(outType.createSerializer(executionConfig));
	}

	public <IN, OUT> void setOperator(Integer vertexID, StreamOperator<OUT> operatorObject) {
		getStreamNode(vertexID).setOperator(operatorObject);
	}

	public void setInputFormat(Integer vertexID, InputFormat<?, ?> inputFormat) {
		getStreamNode(vertexID).setInputFormat(inputFormat);
	}

	void setTransformationId(Integer nodeId, String transformationId) {
		StreamNode node = streamNodes.get(nodeId);
		if (node != null) {
			node.setTransformationId(transformationId);
		}
	}

	public StreamNode getStreamNode(Integer vertexID) {
		return streamNodes.get(vertexID);
	}

	protected Collection<? extends Integer> getVertexIDs() {
		return streamNodes.keySet();
	}

	public List<StreamEdge> getStreamEdges(int sourceId, int targetId) {

		List<StreamEdge> result = new ArrayList<>();
		for (StreamEdge edge : getStreamNode(sourceId).getOutEdges()) {
			if (edge.getTargetId() == targetId) {
				result.add(edge);
			}
		}

		if (result.isEmpty()) {
			throw new RuntimeException("No such edge in stream graph: " + sourceId + " -> " + targetId);
		}

		return result;
	}

	public Collection<Integer> getSourceIDs() {
		return sources;
	}


	public Collection<Integer> getSinkIDs() {
		return sinks;
	}

	public Collection<StreamNode> getStreamNodes() {
		return streamNodes.values();
	}

	public Set<Tuple2<Integer, StreamOperator<?>>> getOperators() {
		Set<Tuple2<Integer, StreamOperator<?>>> operatorSet = new HashSet<>();
		for (StreamNode vertex : streamNodes.values()) {
			operatorSet.add(new Tuple2<Integer, StreamOperator<?>>(vertex.getId(), vertex
					.getOperator()));
		}
		return operatorSet;
	}

	public String getBrokerID(Integer vertexID) {
		return vertexIDtoBrokerID.get(vertexID);
	}

	public long getLoopTimeout(Integer vertexID) {
		return vertexIDtoLoopTimeout.get(vertexID);
	}

	public Tuple2<StreamNode, StreamNode> createIterationSourceAndSink(int loopId, int sourceId, int sinkId, long timeout, int parallelism) {
		StreamNode source = this.addNode(sourceId,
			null,
			StreamIterationHead.class,
			null,
			"IterationSource-" + loopId);
		sources.add(source.getId());
		setParallelism(source.getId(), parallelism);

		StreamNode sink = this.addNode(sinkId,
			null,
			StreamIterationTail.class,
			null,
			"IterationSink-" + loopId);
		sinks.add(sink.getId());
		setParallelism(sink.getId(), parallelism);

		iterationSourceSinkPairs.add(new Tuple2<>(source, sink));

		this.vertexIDtoBrokerID.put(source.getId(), "broker-" + loopId);
		this.vertexIDtoBrokerID.put(sink.getId(), "broker-" + loopId);
		this.vertexIDtoLoopTimeout.put(source.getId(), timeout);
		this.vertexIDtoLoopTimeout.put(sink.getId(), timeout);

		return new Tuple2<>(source, sink);
	}

	public Set<Tuple2<StreamNode, StreamNode>> getIterationSourceSinkPairs() {
		return iterationSourceSinkPairs;
	}

	private void removeEdge(StreamEdge edge) {
		edge.getSourceVertex().getOutEdges().remove(edge);
		edge.getTargetVertex().getInEdges().remove(edge);
	}

	private void removeVertex(StreamNode toRemove) {
		Set<StreamEdge> edgesToRemove = new HashSet<>();

		edgesToRemove.addAll(toRemove.getInEdges());
		edgesToRemove.addAll(toRemove.getOutEdges());

		for (StreamEdge edge : edgesToRemove) {
			removeEdge(edge);
		}
		streamNodes.remove(toRemove.getId());
	}

	/**
	 * Gets the assembled {@link JobGraph}.
	 */
	@SuppressWarnings("deprecation")
	public JobGraph getJobGraph() {
		// temporarily forbid checkpointing for iterative jobs
		if (isIterative() && checkpointConfig.isCheckpointingEnabled() && !checkpointConfig.isForceCheckpointing()) {
			throw new UnsupportedOperationException(
					"Checkpointing is currently not supported by default for iterative jobs, as we cannot guarantee exactly once semantics. "
							+ "State checkpoints happen normally, but records in-transit during the snapshot will be lost upon failure. "
							+ "\nThe user can force enable state checkpoints with the reduced guarantees by calling: env.enableCheckpointing(interval,true)");
		}

		StreamingJobGraphGenerator jobgraphGenerator = new StreamingJobGraphGenerator(this);

		return jobgraphGenerator.createJobGraph();
	}

	@Override
	public String getStreamingPlanAsJSON() {
		try {
			return new JSONGenerator(this).getJSON();
		}
		catch (Exception e) {
			throw new RuntimeException("JSON plan creation failed", e);
		}
	}

	@Override
	public void dumpStreamingPlanAsJSON(File file) throws IOException {
		PrintWriter pw = null;
		try {
			pw = new PrintWriter(new FileOutputStream(file), false);
			pw.write(getStreamingPlanAsJSON());
			pw.flush();

		} finally {
			if (pw != null) {
				pw.close();
			}
		}
	}

	public static enum ResourceStrategy {
		DEFAULT, ISOLATE, NEWGROUP
	}
}

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.jobgraph;

import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FSDataInputStream;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.blob.BlobClient;
import org.apache.flink.runtime.blob.BlobKey;
import org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings;

import java.io.IOException;
import java.io.Serializable;
import java.net.InetSocketAddress;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;
/**
 * The JobGraph represents a Flink dataflow program, at the low level that the JobManager accepts.
 * All programs from higher level APIs are transformed into JobGraphs.
 *
 * <p>The JobGraph is a graph of vertices and intermediate results that are connected together to
 * form a DAG. Note that iterations (feedback edges) are currently not encoded inside the JobGraph
 * but inside certain special vertices that establish the feedback channel amongst themselves.</p>
 *
 * <p>The JobGraph defines the job-wide configuration settings, while each vertex and intermediate result
 * define the characteristics of the concrete operation and intermediate data.</p>
 */
public class JobGraph implements Serializable {

	private static final long serialVersionUID = 1L;

	// --------------------------------------------------------------------------------------------
	// Members that define the structure / topology of the graph
	// --------------------------------------------------------------------------------------------

	/** List of task vertices included in this job graph. */
	private final Map<JobVertexID, JobVertex> taskVertices = new LinkedHashMap<JobVertexID, JobVertex>();

	/** The job configuration attached to this job. */
	private final Configuration jobConfiguration = new Configuration();

	/** Set of JAR files required to run this job. */
	private final List<Path> userJars = new ArrayList<Path>();

	/** Set of blob keys identifying the JAR files required to run this job. */
	private final List<BlobKey> userJarBlobKeys = new ArrayList<BlobKey>();

	/** ID of this job. May be set if specific job id is desired (e.g. session management) */
	private final JobID jobID;

	/** Name of this job. */
	private final String jobName;

	/** Configuration which defines which restart strategy to use for the job recovery */
	private RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration;
	

	/** The number of seconds after which the corresponding ExecutionGraph is removed at the
	 * job manager after it has been executed. */
	private long sessionTimeout = 0;

	/** flag to enable queued scheduling */
	private boolean allowQueuedScheduling;

	/** The mode in which the job is scheduled */
	private ScheduleMode scheduleMode = ScheduleMode.FROM_SOURCES;

	/** The settings for asynchronous snapshots */
	private JobSnapshottingSettings snapshotSettings;

	/** List of classpaths required to run this job. */
	private List<URL> classpaths = Collections.<URL>emptyList();

	// --------------------------------------------------------------------------------------------

	/**
	 * Constructs a new job graph with no name and a random job ID.
	 */
	public JobGraph() {
		this((String) null);
	}

	/**
	 * Constructs a new job graph with the given name, a random job ID.
	 *
	 * @param jobName The name of the job
	 */
	public JobGraph(String jobName) {
		this(null, jobName);
	}

	/**
	 * Constructs a new job graph with the given name and a random job ID if null supplied as an id.
	 *
	 * @param jobId The id of the job. A random ID is generated, if {@code null} is passed.
	 * @param jobName The name of the job.
	 */
	public JobGraph(JobID jobId, String jobName) {
		this.jobID = jobId == null ? new JobID() : jobId;
		this.jobName = jobName == null ? "(unnamed job)" : jobName;
	}

	/**
	 * Constructs a new job graph with no name and a random job ID if null supplied as an id.
	 *
	 * @param vertices The vertices to add to the graph.
	 */
	public JobGraph(JobVertex... vertices) {
		this(null, vertices);
	}

	/**
	 * Constructs a new job graph with the given name and a random job ID.
	 *
	 * @param jobName The name of the job.
	 * @param vertices The vertices to add to the graph.
	 */
	public JobGraph(String jobName, JobVertex... vertices) {
		this(null, jobName, vertices);
	}

	/**
	 * Constructs a new job graph with the given name and a random job ID if null supplied as an id.
	 *
	 * @param jobId The id of the job. A random ID is generated, if {@code null} is passed.
	 * @param jobName The name of the job.
	 * @param vertices The vertices to add to the graph.
	 */
	public JobGraph(JobID jobId, String jobName, JobVertex... vertices) {
		this(jobId, jobName);

		for (JobVertex vertex : vertices) {
			addVertex(vertex);
		}
	}

	// --------------------------------------------------------------------------------------------

	/**
	 * Returns the ID of the job.
	 *
	 * @return the ID of the job
	 */
	public JobID getJobID() {
		return this.jobID;
	}

	/**
	 * Returns the name assigned to the job graph.
	 *
	 * @return the name assigned to the job graph
	 */
	public String getName() {
		return this.jobName;
	}

	/**
	 * Returns the configuration object for this job. Job-wide parameters should be set into that
	 * configuration object.
	 *
	 * @return The configuration object for this job.
	 */
	public Configuration getJobConfiguration() {
		return this.jobConfiguration;
	}

	/**
	 * Sets the restart strategy configuration. This configuration specifies the restart strategy
	 * to be used by the ExecutionGraph in case of a restart.
	 *
	 * @param restartStrategyConfiguration Restart strategy configuration to be set
	 */
	public void setRestartStrategyConfiguration(RestartStrategies.RestartStrategyConfiguration restartStrategyConfiguration) {
		this.restartStrategyConfiguration = restartStrategyConfiguration;
	}

	/**
	 * Gets the restart strategy configuration
	 *
	 * @return Restart strategy configuration to be used
	 */
	public RestartStrategies.RestartStrategyConfiguration getRestartStrategyConfiguration() {
		return restartStrategyConfiguration;
	}

	/**
	 * Gets the timeout after which the corresponding ExecutionGraph is removed at the
	 * job manager after it has been executed.
	 * @return a timeout as a long in seconds.
	 */
	public long getSessionTimeout() {
		return sessionTimeout;
	}

	/**
	 * Sets the timeout of the session in seconds. The timeout specifies how long a job will be kept
	 * in the job manager after it finishes.
	 * @param sessionTimeout The timeout in seconds
	 */
	public void setSessionTimeout(long sessionTimeout) {
		this.sessionTimeout = sessionTimeout;
	}

	public void setAllowQueuedScheduling(boolean allowQueuedScheduling) {
		this.allowQueuedScheduling = allowQueuedScheduling;
	}

	public boolean getAllowQueuedScheduling() {
		return allowQueuedScheduling;
	}

	public void setScheduleMode(ScheduleMode scheduleMode) {
		this.scheduleMode = scheduleMode;
	}

	public ScheduleMode getScheduleMode() {
		return scheduleMode;
	}

	/**
	 * Adds a new task vertex to the job graph if it is not already included.
	 *
	 * @param vertex
	 *        the new task vertex to be added
	 */
	public void addVertex(JobVertex vertex) {
		final JobVertexID id = vertex.getID();
		JobVertex previous = taskVertices.put(id, vertex);

		// if we had a prior association, restore and throw an exception
		if (previous != null) {
			taskVertices.put(id, previous);
			throw new IllegalArgumentException("The JobGraph already contains a vertex with that id.");
		}
	}

	/**
	 * Returns an Iterable to iterate all vertices registered with the job graph.
	 *
	 * @return an Iterable to iterate all vertices registered with the job graph
	 */
	public Iterable<JobVertex> getVertices() {
		return this.taskVertices.values();
	}

	/**
	 * Returns an array of all job vertices that are registered with the job graph. The order in which the vertices
	 * appear in the list is not defined.
	 *
	 * @return an array of all job vertices that are registered with the job graph
	 */
	public JobVertex[] getVerticesAsArray() {
		return this.taskVertices.values().toArray(new JobVertex[this.taskVertices.size()]);
	}

	/**
	 * Returns the number of all vertices.
	 *
	 * @return The number of all vertices.
	 */
	public int getNumberOfVertices() {
		return this.taskVertices.size();
	}

	/**
	 * Sets the settings for asynchronous snapshots. A value of {@code null} means that
	 * snapshotting is not enabled.
	 *
	 * @param settings The snapshot settings, or null, to disable snapshotting.
	 */
	public void setSnapshotSettings(JobSnapshottingSettings settings) {
		this.snapshotSettings = settings;
	}

	/**
	 * Gets the settings for asynchronous snapshots. This method returns null, when
	 * snapshotting is not enabled.
	 *
	 * @return The snapshot settings, or null, if snapshotting is not enabled.
	 */
	public JobSnapshottingSettings getSnapshotSettings() {
		return snapshotSettings;
	}

	/**
	 * Searches for a vertex with a matching ID and returns it.
	 *
	 * @param id
	 *        the ID of the vertex to search for
	 * @return the vertex with the matching ID or <code>null</code> if no vertex with such ID could be found
	 */
	public JobVertex findVertexByID(JobVertexID id) {
		return this.taskVertices.get(id);
	}

	/**
	 * Sets the classpaths required to run the job on a task manager.
	 *
	 * @param paths paths of the directories/JAR files required to run the job on a task manager
	 */
	public void setClasspaths(List<URL> paths) {
		classpaths = paths;
	}

	public List<URL> getClasspaths() {
		return classpaths;
	}

	/**
	 * Sets the savepoint path to rollback the deployment to.
	 *
	 * @param savepointPath The savepoint path
	 */
	public void setSavepointPath(String savepointPath) {
		if (savepointPath != null) {
			if (snapshotSettings == null) {
				throw new IllegalStateException("Checkpointing disabled");
			}
			else {
				snapshotSettings.setSavepointPath(savepointPath);
			}
		}
	}

	// --------------------------------------------------------------------------------------------

	public List<JobVertex> getVerticesSortedTopologicallyFromSources() throws InvalidProgramException {
		// early out on empty lists
		if (this.taskVertices.isEmpty()) {
			return Collections.emptyList();
		}

		List<JobVertex> sorted = new ArrayList<JobVertex>(this.taskVertices.size());
		Set<JobVertex> remaining = new LinkedHashSet<JobVertex>(this.taskVertices.values());

		// start by finding the vertices with no input edges
		// and the ones with disconnected inputs (that refer to some standalone data set)
		{
			Iterator<JobVertex> iter = remaining.iterator();
			while (iter.hasNext()) {
				JobVertex vertex = iter.next();

				if (vertex.hasNoConnectedInputs()) {
					sorted.add(vertex);
					iter.remove();
				}
			}
		}

		int startNodePos = 0;

		// traverse from the nodes that were added until we found all elements
		while (!remaining.isEmpty()) {

			// first check if we have more candidates to start traversing from. if not, then the
			// graph is cyclic, which is not permitted
			if (startNodePos >= sorted.size()) {
				throw new InvalidProgramException("The job graph is cyclic.");
			}

			JobVertex current = sorted.get(startNodePos++);
			addNodesThatHaveNoNewPredecessors(current, sorted, remaining);
		}

		return sorted;
	}

	private void addNodesThatHaveNoNewPredecessors(JobVertex start, List<JobVertex> target, Set<JobVertex> remaining) {

		// forward traverse over all produced data sets and all their consumers
		for (IntermediateDataSet dataSet : start.getProducedDataSets()) {
			for (JobEdge edge : dataSet.getConsumers()) {

				// a vertex can be added, if it has no predecessors that are still in the 'remaining' set
				JobVertex v = edge.getTarget();
				if (!remaining.contains(v)) {
					continue;
				}

				boolean hasNewPredecessors = false;

				for (JobEdge e : v.getInputs()) {
					// skip the edge through which we came
					if (e == edge) {
						continue;
					}

					IntermediateDataSet source = e.getSource();
					if (remaining.contains(source.getProducer())) {
						hasNewPredecessors = true;
						break;
					}
				}

				if (!hasNewPredecessors) {
					target.add(v);
					remaining.remove(v);
					addNodesThatHaveNoNewPredecessors(v, target, remaining);
				}
			}
		}
	}

	// --------------------------------------------------------------------------------------------
	//  Handling of attached JAR files
	// --------------------------------------------------------------------------------------------

	/**
	 * Adds the path of a JAR file required to run the job on a task manager.
	 *
	 * @param jar
	 *        path of the JAR file required to run the job on a task manager
	 */
	public void addJar(Path jar) {
		if (jar == null) {
			throw new IllegalArgumentException();
		}

		if (!userJars.contains(jar)) {
			userJars.add(jar);
		}
	}

	/**
	 * Adds the BLOB referenced by the key to the JobGraph's dependencies.
	 *
	 * @param key
	 *        path of the JAR file required to run the job on a task manager
	 */
	public void addBlob(BlobKey key) {
		if (key == null) {
			throw new IllegalArgumentException();
		}

		if (!userJarBlobKeys.contains(key)) {
			userJarBlobKeys.add(key);
		}
	}

	/**
	 * Checks whether the JobGraph has user code JAR files attached.
	 *
	 * @return True, if the JobGraph has user code JAR files attached, false otherwise.
	 */
	public boolean hasUsercodeJarFiles() {
		return this.userJars.size() > 0;
	}

	/**
	 * Returns a set of BLOB keys referring to the JAR files required to run this job.
	 *
	 * @return set of BLOB keys referring to the JAR files required to run this job
	 */
	public List<BlobKey> getUserJarBlobKeys() {
		return this.userJarBlobKeys;
	}

	/**
	 * Uploads the previously added user jar file to the job manager through the job manager's BLOB server.
	 *
	 * @param serverAddress
	 *        the network address of the BLOB server
	 * @throws IOException
	 *         thrown if an I/O error occurs during the upload
	 */
	public void uploadRequiredJarFiles(InetSocketAddress serverAddress) throws IOException {
		if (this.userJars.isEmpty()) {
			return;
		}

		BlobClient bc = null;
		try {
			bc = new BlobClient(serverAddress);

			for (final Path jar : this.userJars) {

				final FileSystem fs = jar.getFileSystem();
				FSDataInputStream is = null;
				try {
					is = fs.open(jar);
					final BlobKey key = bc.put(is);
					this.userJarBlobKeys.add(key);
				}
				finally {
					if (is != null) {
						is.close();
					}
				}
			}
		}
		finally {
			if (bc != null) {
				bc.close();
			}
		}
	}

	/**
	 * Gets the maximum parallelism of all operations in this job graph.
	 * @return The maximum parallelism of this job graph
	 */
	public int getMaximumParallelism() {
		int maxParallelism = -1;
		for (JobVertex vertex : taskVertices.values()) {
			maxParallelism = Math.max(vertex.getParallelism(), maxParallelism);
		}
		return maxParallelism;
	}

	@Override
	public String toString() {
		return "JobGraph(jobId: " + jobID + ")";
	}
}

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.flink.streaming.api.graph;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.FileSourceFunction;
import org.apache.flink.streaming.api.transformations.CoFeedbackTransformation;
import org.apache.flink.streaming.api.transformations.FeedbackTransformation;
import org.apache.flink.streaming.api.transformations.OneInputTransformation;
import org.apache.flink.streaming.api.transformations.PartitionTransformation;
import org.apache.flink.streaming.api.transformations.SelectTransformation;
import org.apache.flink.streaming.api.transformations.SinkTransformation;
import org.apache.flink.streaming.api.transformations.SourceTransformation;
import org.apache.flink.streaming.api.transformations.SplitTransformation;
import org.apache.flink.streaming.api.transformations.StreamTransformation;
import org.apache.flink.streaming.api.transformations.TwoInputTransformation;
import org.apache.flink.streaming.api.transformations.UnionTransformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * A generator that generates a {@link StreamGraph} from a graph of
 * {@link StreamTransformation StreamTransformations}.
 *
 * <p>
 * This traverses the tree of {@code StreamTransformations} starting from the sinks. At each
 * transformation we recursively transform the inputs, then create a node in the {@code StreamGraph}
 * and add edges from the input Nodes to our newly created node. The transformation methods
 * return the IDs of the nodes in the StreamGraph that represent the input transformation. Several
 * IDs can be returned to be able to deal with feedback transformations and unions.
 *
 * <p>
 * Partitioning, split/select and union don't create actual nodes in the {@code StreamGraph}. For
 * these, we create a virtual node in the {@code StreamGraph} that holds the specific property, i.e.
 * partitioning, selector and so on. When an edge is created from a virtual node to a downstream
 * node the {@code StreamGraph} resolved the id of the original node and creates an edge
 * in the graph with the desired property. For example, if you have this graph:
 *
 * <pre>
 *     Map-1 -> HashPartition-2 -> Map-3
 * </pre>
 *
 * where the numbers represent transformation IDs. We first recurse all the way down. {@code Map-1}
 * is transformed, i.e. we create a {@code StreamNode} with ID 1. Then we transform the
 * {@code HashPartition}, for this, we create virtual node of ID 4 that holds the property
 * {@code HashPartition}. This transformation returns the ID 4. Then we transform the {@code Map-3}.
 * We add the edge {@code 4 -> 3}. The {@code StreamGraph} resolved the actual node with ID 1 and
 * creates and edge {@code 1 -> 3} with the property HashPartition.
 */
@Internal
public class StreamGraphGenerator {

	private static final Logger LOG = LoggerFactory.getLogger(StreamGraphGenerator.class);

	// The StreamGraph that is being built, this is initialized at the beginning.
	private StreamGraph streamGraph;

	private final StreamExecutionEnvironment env;

	// This is used to assign a unique ID to iteration source/sink
	protected static Integer iterationIdCounter = 0;
	public static int getNewIterationNodeId() {
		iterationIdCounter--;
		return iterationIdCounter;
	}

	// Keep track of which Transforms we have already transformed, this is necessary because
	// we have loops, i.e. feedback edges.
	private Map<StreamTransformation<?>, Collection<Integer>> alreadyTransformed;


	/**
	 * Private constructor. The generator should only be invoked using {@link #generate}.
	 */
	private StreamGraphGenerator(StreamExecutionEnvironment env) {
		this.streamGraph = new StreamGraph(env);
		this.streamGraph.setChaining(env.isChainingEnabled());
		this.streamGraph.setStateBackend(env.getStateBackend());
		this.env = env;
		this.alreadyTransformed = new HashMap<>();
	}

	/**
	 * Generates a {@code StreamGraph} by traversing the graph of {@code StreamTransformations}
	 * starting from the given transformations.
	 *
	 * @param env The {@code StreamExecutionEnvironment} that is used to set some parameters of the
	 *            job
	 * @param transformations The transformations starting from which to transform the graph
	 *
	 * @return The generated {@code StreamGraph}
	 */
	public static StreamGraph generate(StreamExecutionEnvironment env, List<StreamTransformation<?>> transformations) {
		return new StreamGraphGenerator(env).generateInternal(transformations);
	}

	/**
	 * This starts the actual transformation, beginning from the sinks.
	 */
	private StreamGraph generateInternal(List<StreamTransformation<?>> transformations) {
		for (StreamTransformation<?> transformation: transformations) {
			transform(transformation);
		}
		return streamGraph;
	}

	/**
	 * Transforms one {@code StreamTransformation}.
	 *
	 * <p>
	 * This checks whether we already transformed it and exits early in that case. If not it
	 * delegates to one of the transformation specific methods.
	 */
	private Collection<Integer> transform(StreamTransformation<?> transform) {

		if (alreadyTransformed.containsKey(transform)) {
			return alreadyTransformed.get(transform);
		}

		LOG.debug("Transforming " + transform);

		// call at least once to trigger exceptions about MissingTypeInfo
		transform.getOutputType();

		Collection<Integer> transformedIds;
		if (transform instanceof OneInputTransformation<?, ?>) {
			transformedIds = transformOnInputTransform((OneInputTransformation<?, ?>) transform);
		} else if (transform instanceof TwoInputTransformation<?, ?, ?>) {
			transformedIds = transformTwoInputTransform((TwoInputTransformation<?, ?, ?>) transform);
		} else if (transform instanceof SourceTransformation<?>) {
			transformedIds = transformSource((SourceTransformation<?>) transform);
		} else if (transform instanceof SinkTransformation<?>) {
			transformedIds = transformSink((SinkTransformation<?>) transform);
		} else if (transform instanceof UnionTransformation<?>) {
			transformedIds = transformUnion((UnionTransformation<?>) transform);
		} else if (transform instanceof SplitTransformation<?>) {
			transformedIds = transformSplit((SplitTransformation<?>) transform);
		} else if (transform instanceof SelectTransformation<?>) {
			transformedIds = transformSelect((SelectTransformation<?>) transform);
		} else if (transform instanceof FeedbackTransformation<?>) {
			transformedIds = transformFeedback((FeedbackTransformation<?>) transform);
		} else if (transform instanceof CoFeedbackTransformation<?>) {
			transformedIds = transformCoFeedback((CoFeedbackTransformation<?>) transform);
		} else if (transform instanceof PartitionTransformation<?>) {
			transformedIds = transformPartition((PartitionTransformation<?>) transform);
		} else {
			throw new IllegalStateException("Unknown transformation: " + transform);
		}

		// need this check because the iterate transformation adds itself before
		// transforming the feedback edges
		if (!alreadyTransformed.containsKey(transform)) {
			alreadyTransformed.put(transform, transformedIds);
		}

		if (transform.getBufferTimeout() > 0) {
			streamGraph.setBufferTimeout(transform.getId(), transform.getBufferTimeout());
		}
		if (transform.getUid() != null) {
			streamGraph.setTransformationId(transform.getId(), transform.getUid());
		}

		return transformedIds;
	}

	/**
	 * Transforms a {@code UnionTransformation}.
	 *
	 * <p>
	 * This is easy, we only have to transform the inputs and return all the IDs in a list so
	 * that downstream operations can connect to all upstream nodes.
	 */
	private <T> Collection<Integer> transformUnion(UnionTransformation<T> union) {
		List<StreamTransformation<T>> inputs = union.getInputs();
		List<Integer> resultIds = new ArrayList<>();

		for (StreamTransformation<T> input: inputs) {
			resultIds.addAll(transform(input));
		}

		return resultIds;
	}

	/**
	 * Transforms a {@code PartitionTransformation}.
	 *
	 * <p>
	 * For this we create a virtual node in the {@code StreamGraph} that holds the partition
	 * property. @see StreamGraphGenerator
	 */
	private <T> Collection<Integer> transformPartition(PartitionTransformation<T> partition) {
		StreamTransformation<T> input = partition.getInput();
		List<Integer> resultIds = new ArrayList<>();

		Collection<Integer> transformedIds = transform(input);
		for (Integer transformedId: transformedIds) {
			int virtualId = StreamTransformation.getNewNodeId();
			streamGraph.addVirtualPartitionNode(transformedId, virtualId, partition.getPartitioner());
			resultIds.add(virtualId);
		}

		return resultIds;
	}

	/**
	 * Transforms a {@code SplitTransformation}.
	 *
	 * <p>
	 * We add the output selector to previously transformed nodes.
	 */
	private <T> Collection<Integer> transformSplit(SplitTransformation<T> split) {

		StreamTransformation<T> input = split.getInput();
		Collection<Integer> resultIds = transform(input);

		// the recursive transform call might have transformed this already
		if (alreadyTransformed.containsKey(split)) {
			return alreadyTransformed.get(split);
		}

		for (int inputId : resultIds) {
			streamGraph.addOutputSelector(inputId, split.getOutputSelector());
		}


		return resultIds;
	}

	/**
	 * Transforms a {@code SelectTransformation}.
	 *
	 * <p>
	 * For this we create a virtual node in the {@code StreamGraph} holds the selected names.
	 * @see org.apache.flink.streaming.api.graph.StreamGraphGenerator
	 */
	private <T> Collection<Integer> transformSelect(SelectTransformation<T> select) {
		StreamTransformation<T> input = select.getInput();
		Collection<Integer> resultIds = transform(input);


		// the recursive transform might have already transformed this
		if (alreadyTransformed.containsKey(select)) {
			return alreadyTransformed.get(select);
		}

		List<Integer> virtualResultIds = new ArrayList<>();

		for (int inputId : resultIds) {
			int virtualId = StreamTransformation.getNewNodeId();
			streamGraph.addVirtualSelectNode(inputId, virtualId, select.getSelectedNames());
			virtualResultIds.add(virtualId);
		}
		return virtualResultIds;
	}

	/**
	 * Transforms a {@code FeedbackTransformation}.
	 *
	 * <p>
	 * This will recursively transform the input and the feedback edges. We return the concatenation
	 * of the input IDs and the feedback IDs so that downstream operations can be wired to both.
	 *
	 * <p>
	 * This is responsible for creating the IterationSource and IterationSink which
	 * are used to feed back the elements.
	 */
	private <T> Collection<Integer> transformFeedback(FeedbackTransformation<T> iterate) {

		if (iterate.getFeedbackEdges().size() <= 0) {
			throw new IllegalStateException("Iteration " + iterate + " does not have any feedback edges.");
		}

		StreamTransformation<T> input = iterate.getInput();
		List<Integer> resultIds = new ArrayList<>();

		// first transform the input stream(s) and store the result IDs
		Collection<Integer> inputIds = transform(input);
		resultIds.addAll(inputIds);

		// the recursive transform might have already transformed this
		if (alreadyTransformed.containsKey(iterate)) {
			return alreadyTransformed.get(iterate);
		}

		// create the fake iteration source/sink pair
		Tuple2<StreamNode, StreamNode> itSourceAndSink = streamGraph.createIterationSourceAndSink(
				iterate.getId(),
				getNewIterationNodeId(),
				getNewIterationNodeId(),
				iterate.getWaitTime(),
				iterate.getParallelism());

		StreamNode itSource = itSourceAndSink.f0;
		StreamNode itSink = itSourceAndSink.f1;

		// We set the proper serializers for the sink/source
		streamGraph.setSerializers(itSource.getId(), null, null, iterate.getOutputType().createSerializer(env.getConfig()));
		streamGraph.setSerializers(itSink.getId(), iterate.getOutputType().createSerializer(env.getConfig()), null, null);

		// also add the feedback source ID to the result IDs, so that downstream operators will
		// add both as input
		resultIds.add(itSource.getId());

		// at the iterate to the already-seen-set with the result IDs, so that we can transform
		// the feedback edges and let them stop when encountering the iterate node
		alreadyTransformed.put(iterate, resultIds);

		// so that we can determine the slot sharing group from all feedback edges
		List<Integer> allFeedbackIds = new ArrayList<>();

		for (StreamTransformation<T> feedbackEdge : iterate.getFeedbackEdges()) {
			Collection<Integer> feedbackIds = transform(feedbackEdge);
			allFeedbackIds.addAll(feedbackIds);
			for (Integer feedbackId: feedbackIds) {
				streamGraph.addEdge(feedbackId,
						itSink.getId(),
						0
				);
			}
		}

		String slotSharingGroup = determineSlotSharingGroup(null, allFeedbackIds);

		itSink.setSlotSharingGroup(slotSharingGroup);
		itSource.setSlotSharingGroup(slotSharingGroup);

		return resultIds;
	}

	/**
	 * Transforms a {@code CoFeedbackTransformation}.
	 *
	 * <p>
	 * This will only transform feedback edges, the result of this transform will be wired
	 * to the second input of a Co-Transform. The original input is wired directly to the first
	 * input of the downstream Co-Transform.
	 *
	 * <p>
	 * This is responsible for creating the IterationSource and IterationSink which
	 * are used to feed back the elements.
	 */
	private <F> Collection<Integer> transformCoFeedback(CoFeedbackTransformation<F> coIterate) {

		// For Co-Iteration we don't need to transform the input and wire the input to the
		// head operator by returning the input IDs, the input is directly wired to the left
		// input of the co-operation. This transform only needs to return the ids of the feedback
		// edges, since they need to be wired to the second input of the co-operation.

		// create the fake iteration source/sink pair
		Tuple2<StreamNode, StreamNode> itSourceAndSink = streamGraph.createIterationSourceAndSink(
				coIterate.getId(),
				getNewIterationNodeId(),
				getNewIterationNodeId(),
				coIterate.getWaitTime(),
				coIterate.getParallelism());

		StreamNode itSource = itSourceAndSink.f0;
		StreamNode itSink = itSourceAndSink.f1;

		// We set the proper serializers for the sink/source
		streamGraph.setSerializers(itSource.getId(), null, null, coIterate.getOutputType().createSerializer(env.getConfig()));
		streamGraph.setSerializers(itSink.getId(), coIterate.getOutputType().createSerializer(env.getConfig()), null, null);

		Collection<Integer> resultIds = Collections.singleton(itSource.getId());

		// at the iterate to the already-seen-set with the result IDs, so that we can transform
		// the feedback edges and let them stop when encountering the iterate node
		alreadyTransformed.put(coIterate, resultIds);

		// so that we can determine the slot sharing group from all feedback edges
		List<Integer> allFeedbackIds = new ArrayList<>();

		for (StreamTransformation<F> feedbackEdge : coIterate.getFeedbackEdges()) {
			Collection<Integer> feedbackIds = transform(feedbackEdge);
			allFeedbackIds.addAll(feedbackIds);
			for (Integer feedbackId: feedbackIds) {
				streamGraph.addEdge(feedbackId,
						itSink.getId(),
						0
				);
			}
		}

		String slotSharingGroup = determineSlotSharingGroup(null, allFeedbackIds);

		itSink.setSlotSharingGroup(slotSharingGroup);
		itSource.setSlotSharingGroup(slotSharingGroup);

		return Collections.singleton(itSource.getId());
	}

	/**
	 * Transforms a {@code SourceTransformation}.
	 */
	private <T> Collection<Integer> transformSource(SourceTransformation<T> source) {
		String slotSharingGroup = determineSlotSharingGroup(source.getSlotSharingGroup(), new ArrayList<Integer>());
		streamGraph.addSource(source.getId(),
				slotSharingGroup,
				source.getOperator(),
				null,
				source.getOutputType(),
				"Source: " + source.getName());
		if (source.getOperator().getUserFunction() instanceof FileSourceFunction) {
			FileSourceFunction<T> fs = (FileSourceFunction<T>) source.getOperator().getUserFunction();
			streamGraph.setInputFormat(source.getId(), fs.getFormat());
		}
		streamGraph.setParallelism(source.getId(), source.getParallelism());
		return Collections.singleton(source.getId());
	}

	/**
	 * Transforms a {@code SourceTransformation}.
	 */
	private <T> Collection<Integer> transformSink(SinkTransformation<T> sink) {

		Collection<Integer> inputIds = transform(sink.getInput());

		String slotSharingGroup = determineSlotSharingGroup(sink.getSlotSharingGroup(), inputIds);

		streamGraph.addSink(sink.getId(),
				slotSharingGroup,
				sink.getOperator(),
				sink.getInput().getOutputType(),
				null,
				"Sink: " + sink.getName());

		streamGraph.setParallelism(sink.getId(), sink.getParallelism());

		for (Integer inputId: inputIds) {
			streamGraph.addEdge(inputId,
					sink.getId(),
					0
			);
		}


		if (sink.getStateKeySelector() != null) {
			TypeSerializer<?> keySerializer = sink.getStateKeyType().createSerializer(env.getConfig());
			streamGraph.setOneInputStateKey(sink.getId(), sink.getStateKeySelector(), keySerializer);
		}

		return Collections.emptyList();
	}

	/**
	 * Transforms a {@code OneInputTransformation}.
	 *
	 * <p>
	 * This recusively transforms the inputs, creates a new {@code StreamNode} in the graph and
	 * wired the inputs to this new node.
	 */
	private <IN, OUT> Collection<Integer> transformOnInputTransform(OneInputTransformation<IN, OUT> transform) {

		Collection<Integer> inputIds = transform(transform.getInput());

		// the recursive call might have already transformed this
		if (alreadyTransformed.containsKey(transform)) {
			return alreadyTransformed.get(transform);
		}

		String slotSharingGroup = determineSlotSharingGroup(transform.getSlotSharingGroup(), inputIds);

		streamGraph.addOperator(transform.getId(),
				slotSharingGroup,
				transform.getOperator(),
				transform.getInputType(),
				transform.getOutputType(),
				transform.getName());

		if (transform.getStateKeySelector() != null) {
			TypeSerializer<?> keySerializer = transform.getStateKeyType().createSerializer(env.getConfig());
			streamGraph.setOneInputStateKey(transform.getId(), transform.getStateKeySelector(), keySerializer);
		}

		streamGraph.setParallelism(transform.getId(), transform.getParallelism());

		for (Integer inputId: inputIds) {
			streamGraph.addEdge(inputId, transform.getId(), 0);
		}

		return Collections.singleton(transform.getId());
	}

	/**
	 * Transforms a {@code TwoInputTransformation}.
	 *
	 * <p>
	 * This recusively transforms the inputs, creates a new {@code StreamNode} in the graph and
	 * wired the inputs to this new node.
	 */
	private <IN1, IN2, OUT> Collection<Integer> transformTwoInputTransform(TwoInputTransformation<IN1, IN2, OUT> transform) {

		Collection<Integer> inputIds1 = transform(transform.getInput1());
		Collection<Integer> inputIds2 = transform(transform.getInput2());

		// the recursive call might have already transformed this
		if (alreadyTransformed.containsKey(transform)) {
			return alreadyTransformed.get(transform);
		}

		List<Integer> allInputIds = new ArrayList<>();
		allInputIds.addAll(inputIds1);
		allInputIds.addAll(inputIds2);

		String slotSharingGroup = determineSlotSharingGroup(transform.getSlotSharingGroup(), allInputIds);

		streamGraph.addCoOperator(
				transform.getId(),
				slotSharingGroup,
				transform.getOperator(),
				transform.getInputType1(),
				transform.getInputType2(),
				transform.getOutputType(),
				transform.getName());

		if (transform.getStateKeySelector1() != null) {
			TypeSerializer<?> keySerializer = transform.getStateKeyType().createSerializer(env.getConfig());
			streamGraph.setTwoInputStateKey(transform.getId(), transform.getStateKeySelector1(), transform.getStateKeySelector2(), keySerializer);
		}


		streamGraph.setParallelism(transform.getId(), transform.getParallelism());

		for (Integer inputId: inputIds1) {
			streamGraph.addEdge(inputId,
					transform.getId(),
					1
			);
		}

		for (Integer inputId: inputIds2) {
			streamGraph.addEdge(inputId,
					transform.getId(),
					2
			);
		}

		return Collections.singleton(transform.getId());
	}

	/**
	 * Determines the slot sharing group for an operation based on the slot sharing group set by
	 * the user and the slot sharing groups of the inputs.
	 *
	 * <p>If the user specifies a group name, this is taken as is. If nothing is specified and
	 * the input operations all have the same group name then this name is taken. Otherwise the
	 * default group is choosen.
	 *
	 * @param specifiedGroup The group specified by the user.
	 * @param inputIds The IDs of the input operations.
	 */
	private String determineSlotSharingGroup(String specifiedGroup, Collection<Integer> inputIds) {
		if (specifiedGroup != null) {
			return specifiedGroup;
		} else {
			String inputGroup = null;
			for (int id: inputIds) {
				String inputGroupCandidate = streamGraph.getSlotSharingGroup(id);
				if (inputGroup == null) {
					inputGroup = inputGroupCandidate;
				} else if (!inputGroup.equals(inputGroupCandidate)) {
					return "default";
				}
			}
			return inputGroup == null ? "default" : inputGroup;
		}
	}
}

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.graph;

import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;

import org.apache.commons.lang3.StringUtils;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.operators.util.UserCodeObjectWrapper;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.io.network.partition.ResultPartitionType;
import org.apache.flink.runtime.jobgraph.DistributionPattern;
import org.apache.flink.runtime.jobgraph.InputFormatVertex;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobVertex;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.runtime.jobgraph.ScheduleMode;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup;
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
import org.apache.flink.runtime.operators.util.TaskConfig;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator;
import org.apache.flink.streaming.api.operators.ChainingStrategy;
import org.apache.flink.streaming.api.operators.StreamOperator;
import org.apache.flink.streaming.api.transformations.StreamTransformation;
import org.apache.flink.streaming.runtime.partitioner.ForwardPartitioner;
import org.apache.flink.streaming.runtime.partitioner.RescalePartitioner;
import org.apache.flink.streaming.runtime.partitioner.StreamPartitioner;
import org.apache.flink.streaming.runtime.tasks.StreamIterationHead;
import org.apache.flink.streaming.runtime.tasks.StreamIterationTail;
import org.apache.flink.util.InstantiationUtil;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;

import static org.apache.flink.util.StringUtils.byteToHexString;

@Internal
public class StreamingJobGraphGenerator {

	private static final Logger LOG = LoggerFactory.getLogger(StreamingJobGraphGenerator.class);

	/**
	 * Restart delay used for the FixedDelayRestartStrategy in case checkpointing was enabled but
	 * no restart strategy has been specified.
	 */
	private static final long DEFAULT_RESTART_DELAY = 10000L;

	private StreamGraph streamGraph;

	private Map<Integer, JobVertex> jobVertices;
	private JobGraph jobGraph;
	private Collection<Integer> builtVertices;

	private List<StreamEdge> physicalEdgesInOrder;

	private Map<Integer, Map<Integer, StreamConfig>> chainedConfigs;

	private Map<Integer, StreamConfig> vertexConfigs;
	private Map<Integer, String> chainedNames;

	public StreamingJobGraphGenerator(StreamGraph streamGraph) {
		this.streamGraph = streamGraph;
	}

	private void init() {
		this.jobVertices = new HashMap<>();
		this.builtVertices = new HashSet<>();
		this.chainedConfigs = new HashMap<>();
		this.vertexConfigs = new HashMap<>();
		this.chainedNames = new HashMap<>();
		this.physicalEdgesInOrder = new ArrayList<>();
	}

	public JobGraph createJobGraph() {
		jobGraph = new JobGraph(streamGraph.getJobName());

		// make sure that all vertices start immediately
		jobGraph.setScheduleMode(ScheduleMode.ALL);

		init();

		// Generate deterministic hashes for the nodes in order to identify them across
		// submission iff they didn't change.
		Map<Integer, byte[]> hashes = traverseStreamGraphAndGenerateHashes();

		setChaining(hashes);

		setPhysicalEdges();

		setSlotSharing();
		
		configureCheckpointing();

		configureRestartStrategy();

		try {
			InstantiationUtil.writeObjectToConfig(this.streamGraph.getExecutionConfig(), this.jobGraph.getJobConfiguration(), ExecutionConfig.CONFIG_KEY);
		} catch (IOException e) {
			throw new RuntimeException("Config object could not be written to Job Configuration: ", e);
		}
		
		return jobGraph;
	}

	private void setPhysicalEdges() {
		Map<Integer, List<StreamEdge>> physicalInEdgesInOrder = new HashMap<Integer, List<StreamEdge>>();

		for (StreamEdge edge : physicalEdgesInOrder) {
			int target = edge.getTargetId();

			List<StreamEdge> inEdges = physicalInEdgesInOrder.get(target);

			// create if not set
			if (inEdges == null) {
				inEdges = new ArrayList<>();
				physicalInEdgesInOrder.put(target, inEdges);
			}

			inEdges.add(edge);
		}

		for (Map.Entry<Integer, List<StreamEdge>> inEdges : physicalInEdgesInOrder.entrySet()) {
			int vertex = inEdges.getKey();
			List<StreamEdge> edgeList = inEdges.getValue();

			vertexConfigs.get(vertex).setInPhysicalEdges(edgeList);
		}
	}

	/**
	 * Sets up task chains from the source {@link StreamNode} instances.
	 *
	 * <p>This will recursively create all {@link JobVertex} instances.
	 */
	private void setChaining(Map<Integer, byte[]> hashes) {
		for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
			createChain(sourceNodeId, sourceNodeId, hashes);
		}
	}

	private List<StreamEdge> createChain(
			Integer startNodeId,
			Integer currentNodeId,
			Map<Integer, byte[]> hashes) {

		if (!builtVertices.contains(startNodeId)) {

			List<StreamEdge> transitiveOutEdges = new ArrayList<StreamEdge>();

			List<StreamEdge> chainableOutputs = new ArrayList<StreamEdge>();
			List<StreamEdge> nonChainableOutputs = new ArrayList<StreamEdge>();

			for (StreamEdge outEdge : streamGraph.getStreamNode(currentNodeId).getOutEdges()) {
				if (isChainable(outEdge)) {
					chainableOutputs.add(outEdge);
				} else {
					nonChainableOutputs.add(outEdge);
				}
			}

			for (StreamEdge chainable : chainableOutputs) {
				transitiveOutEdges.addAll(createChain(startNodeId, chainable.getTargetId(), hashes));
			}

			for (StreamEdge nonChainable : nonChainableOutputs) {
				transitiveOutEdges.add(nonChainable);
				createChain(nonChainable.getTargetId(), nonChainable.getTargetId(), hashes);
			}

			chainedNames.put(currentNodeId, createChainedName(currentNodeId, chainableOutputs));

			StreamConfig config = currentNodeId.equals(startNodeId)
					? createJobVertex(startNodeId, hashes)
					: new StreamConfig(new Configuration());

			setVertexConfig(currentNodeId, config, chainableOutputs, nonChainableOutputs);

			if (currentNodeId.equals(startNodeId)) {

				config.setChainStart();
				config.setOutEdgesInOrder(transitiveOutEdges);
				config.setOutEdges(streamGraph.getStreamNode(currentNodeId).getOutEdges());

				for (StreamEdge edge : transitiveOutEdges) {
					connect(startNodeId, edge);
				}

				config.setTransitiveChainedTaskConfigs(chainedConfigs.get(startNodeId));

			} else {

				Map<Integer, StreamConfig> chainedConfs = chainedConfigs.get(startNodeId);

				if (chainedConfs == null) {
					chainedConfigs.put(startNodeId, new HashMap<Integer, StreamConfig>());
				}
				chainedConfigs.get(startNodeId).put(currentNodeId, config);
			}

			return transitiveOutEdges;

		} else {
			return new ArrayList<>();
		}
	}

	private String createChainedName(Integer vertexID, List<StreamEdge> chainedOutputs) {
		String operatorName = streamGraph.getStreamNode(vertexID).getOperatorName();
		if (chainedOutputs.size() > 1) {
			List<String> outputChainedNames = new ArrayList<>();
			for (StreamEdge chainable : chainedOutputs) {
				outputChainedNames.add(chainedNames.get(chainable.getTargetId()));
			}
			return operatorName + " -> (" + StringUtils.join(outputChainedNames, ", ") + ")";
		} else if (chainedOutputs.size() == 1) {
			return operatorName + " -> " + chainedNames.get(chainedOutputs.get(0).getTargetId());
		} else {
			return operatorName;
		}
	}

	private StreamConfig createJobVertex(
			Integer streamNodeId,
			Map<Integer, byte[]> hashes) {

		JobVertex jobVertex;
		StreamNode streamNode = streamGraph.getStreamNode(streamNodeId);

		byte[] hash = hashes.get(streamNodeId);

		if (hash == null) {
			throw new IllegalStateException("Cannot find node hash. " +
					"Did you generate them before calling this method?");
		}

		JobVertexID jobVertexId = new JobVertexID(hash);

		if (streamNode.getInputFormat() != null) {
			jobVertex = new InputFormatVertex(
					chainedNames.get(streamNodeId),
					jobVertexId);
			TaskConfig taskConfig = new TaskConfig(jobVertex.getConfiguration());
			taskConfig.setStubWrapper(new UserCodeObjectWrapper<Object>(streamNode.getInputFormat()));
		} else {
			jobVertex = new JobVertex(
					chainedNames.get(streamNodeId),
					jobVertexId);
		}

		jobVertex.setInvokableClass(streamNode.getJobVertexClass());

		int parallelism = streamNode.getParallelism();

		if (parallelism > 0) {
			jobVertex.setParallelism(parallelism);
		}

		if (LOG.isDebugEnabled()) {
			LOG.debug("Parallelism set: {} for {}", parallelism, streamNodeId);
		}

		jobVertices.put(streamNodeId, jobVertex);
		builtVertices.add(streamNodeId);
		jobGraph.addVertex(jobVertex);

		return new StreamConfig(jobVertex.getConfiguration());
	}

	@SuppressWarnings("unchecked")
	private void setVertexConfig(Integer vertexID, StreamConfig config,
			List<StreamEdge> chainableOutputs, List<StreamEdge> nonChainableOutputs) {

		StreamNode vertex = streamGraph.getStreamNode(vertexID);

		config.setVertexID(vertexID);
		config.setBufferTimeout(vertex.getBufferTimeout());

		config.setTypeSerializerIn1(vertex.getTypeSerializerIn1());
		config.setTypeSerializerIn2(vertex.getTypeSerializerIn2());
		config.setTypeSerializerOut(vertex.getTypeSerializerOut());

		config.setStreamOperator(vertex.getOperator());
		config.setOutputSelectors(vertex.getOutputSelectors());

		config.setNumberOfOutputs(nonChainableOutputs.size());
		config.setNonChainedOutputs(nonChainableOutputs);
		config.setChainedOutputs(chainableOutputs);

		config.setTimeCharacteristic(streamGraph.getEnvironment().getStreamTimeCharacteristic());
		
		final CheckpointConfig ceckpointCfg = streamGraph.getCheckpointConfig();
		
		config.setStateBackend(streamGraph.getStateBackend());
		config.setCheckpointingEnabled(ceckpointCfg.isCheckpointingEnabled());
		if (ceckpointCfg.isCheckpointingEnabled()) {
			config.setCheckpointMode(ceckpointCfg.getCheckpointingMode());
		}
		else {
			// the "at-least-once" input handler is slightly cheaper (in the absence of checkpoints),
			// so we use that one if checkpointing is not enabled
			config.setCheckpointMode(CheckpointingMode.AT_LEAST_ONCE);
		}
		config.setStatePartitioner(0, vertex.getStatePartitioner1());
		config.setStatePartitioner(1, vertex.getStatePartitioner2());
		config.setStateKeySerializer(vertex.getStateKeySerializer());
		
		Class<? extends AbstractInvokable> vertexClass = vertex.getJobVertexClass();

		if (vertexClass.equals(StreamIterationHead.class)
				|| vertexClass.equals(StreamIterationTail.class)) {
			config.setIterationId(streamGraph.getBrokerID(vertexID));
			config.setIterationWaitTime(streamGraph.getLoopTimeout(vertexID));
		}

		List<StreamEdge> allOutputs = new ArrayList<StreamEdge>(chainableOutputs);
		allOutputs.addAll(nonChainableOutputs);

		vertexConfigs.put(vertexID, config);
	}

	private void connect(Integer headOfChain, StreamEdge edge) {

		physicalEdgesInOrder.add(edge);

		Integer downStreamvertexID = edge.getTargetId();

		JobVertex headVertex = jobVertices.get(headOfChain);
		JobVertex downStreamVertex = jobVertices.get(downStreamvertexID);

		StreamConfig downStreamConfig = new StreamConfig(downStreamVertex.getConfiguration());

		downStreamConfig.setNumberOfInputs(downStreamConfig.getNumberOfInputs() + 1);

		StreamPartitioner<?> partitioner = edge.getPartitioner();
		if (partitioner instanceof ForwardPartitioner) {
			downStreamVertex.connectNewDataSetAsInput(
				headVertex,
				DistributionPattern.POINTWISE,
				ResultPartitionType.PIPELINED,
				true);
		} else if (partitioner instanceof RescalePartitioner){
			downStreamVertex.connectNewDataSetAsInput(
				headVertex,
				DistributionPattern.POINTWISE,
				ResultPartitionType.PIPELINED,
				true);
		} else {
			downStreamVertex.connectNewDataSetAsInput(
					headVertex,
					DistributionPattern.ALL_TO_ALL,
					ResultPartitionType.PIPELINED,
					true);
		}

		if (LOG.isDebugEnabled()) {
			LOG.debug("CONNECTED: {} - {} -> {}", partitioner.getClass().getSimpleName(),
					headOfChain, downStreamvertexID);
		}
	}

	private boolean isChainable(StreamEdge edge) {
		StreamNode upStreamVertex = edge.getSourceVertex();
		StreamNode downStreamVertex = edge.getTargetVertex();

		StreamOperator<?> headOperator = upStreamVertex.getOperator();
		StreamOperator<?> outOperator = downStreamVertex.getOperator();

		return downStreamVertex.getInEdges().size() == 1
				&& outOperator != null
				&& headOperator != null
				&& upStreamVertex.isSameSlotSharingGroup(downStreamVertex)
				&& outOperator.getChainingStrategy() == ChainingStrategy.ALWAYS
				&& (headOperator.getChainingStrategy() == ChainingStrategy.HEAD ||
					headOperator.getChainingStrategy() == ChainingStrategy.ALWAYS)
				&& (edge.getPartitioner() instanceof ForwardPartitioner)
				&& upStreamVertex.getParallelism() == downStreamVertex.getParallelism()
				&& streamGraph.isChainingEnabled();
	}

	private void setSlotSharing() {

		Map<String, SlotSharingGroup> slotSharingGroups = new HashMap<>();

		for (Entry<Integer, JobVertex> entry : jobVertices.entrySet()) {

			String slotSharingGroup = streamGraph.getStreamNode(entry.getKey()).getSlotSharingGroup();

			SlotSharingGroup group = slotSharingGroups.get(slotSharingGroup);
			if (group == null) {
				group = new SlotSharingGroup();
				slotSharingGroups.put(slotSharingGroup, group);
			}
			entry.getValue().setSlotSharingGroup(group);
		}

		for (Tuple2<StreamNode, StreamNode> pair : streamGraph.getIterationSourceSinkPairs()) {

			CoLocationGroup ccg = new CoLocationGroup();

			JobVertex source = jobVertices.get(pair.f0.getId());
			JobVertex sink = jobVertices.get(pair.f1.getId());

			ccg.addVertex(source);
			ccg.addVertex(sink);
			source.updateCoLocationGroup(ccg);
			sink.updateCoLocationGroup(ccg);
		}

	}
	
	private void configureCheckpointing() {
		CheckpointConfig cfg = streamGraph.getCheckpointConfig();
		
		if (cfg.isCheckpointingEnabled()) {
			long interval = cfg.getCheckpointInterval();
			if (interval < 1) {
				throw new IllegalArgumentException("The checkpoint interval must be positive");
			}

			// collect the vertices that receive "trigger checkpoint" messages.
			// currently, these are all the sources
			List<JobVertexID> triggerVertices = new ArrayList<>();

			// collect the vertices that need to acknowledge the checkpoint
			// currently, these are all vertices
			List<JobVertexID> ackVertices = new ArrayList<>(jobVertices.size());

			// collect the vertices that receive "commit checkpoint" messages
			// currently, these are all vertices
			List<JobVertexID> commitVertices = new ArrayList<>();
			
			for (JobVertex vertex : jobVertices.values()) {
				if (vertex.isInputVertex()) {
					triggerVertices.add(vertex.getID());
				}
				// TODO: add check whether the user function implements the checkpointing interface
				commitVertices.add(vertex.getID());
				ackVertices.add(vertex.getID());
			}

			JobSnapshottingSettings settings = new JobSnapshottingSettings(
					triggerVertices, ackVertices, commitVertices, interval,
					cfg.getCheckpointTimeout(), cfg.getMinPauseBetweenCheckpoints(),
					cfg.getMaxConcurrentCheckpoints());
			jobGraph.setSnapshotSettings(settings);

			// check if a restart strategy has been set, if not then set the FixedDelayRestartStrategy
			if (streamGraph.getExecutionConfig().getRestartStrategy() == null) {
				// if the user enabled checkpointing, the default number of exec retries is infinitive.
				streamGraph.getExecutionConfig().setRestartStrategy(
					RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, DEFAULT_RESTART_DELAY));
			}
		}
	}

	private void configureRestartStrategy() {
		jobGraph.setRestartStrategyConfiguration(streamGraph.getExecutionConfig().getRestartStrategy());
	}

	// ------------------------------------------------------------------------

	/**
	 * Returns a map with a hash for each {@link StreamNode} of the {@link
	 * StreamGraph}. The hash is used as the {@link JobVertexID} in order to
	 * identify nodes across job submissions if they didn't change.
	 *
	 * <p>The complete {@link StreamGraph} is traversed. The hash is either
	 * computed from the transformation's user-specified id (see
	 * {@link StreamTransformation#getUid()}) or generated in a deterministic way.
	 *
	 * <p>The generated hash is deterministic with respect to:
	 * <ul>
	 * <li>node-local properties (like parallelism, UDF, node ID),
	 * <li>chained output nodes, and
	 * <li>input nodes hashes
	 * </ul>
	 *
	 * @return A map from {@link StreamNode#id} to hash as 16-byte array.
	 */
	private Map<Integer, byte[]> traverseStreamGraphAndGenerateHashes() {
		// The hash function used to generate the hash
		final HashFunction hashFunction = Hashing.murmur3_128(0);
		final Map<Integer, byte[]> hashes = new HashMap<>();

		Set<Integer> visited = new HashSet<>();
		Queue<StreamNode> remaining = new ArrayDeque<>();

		// We need to make the source order deterministic. The source IDs are
		// not returned in the same order, which means that submitting the same
		// program twice might result in different traversal, which breaks the
		// deterministic hash assignment.
		List<Integer> sources = new ArrayList<>();
		for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
			sources.add(sourceNodeId);
		}
		Collections.sort(sources);

		//
		// Traverse the graph in a breadth-first manner. Keep in mind that
		// the graph is not a tree and multiple paths to nodes can exist.
		//

		// Start with source nodes
		for (Integer sourceNodeId : sources) {
			remaining.add(streamGraph.getStreamNode(sourceNodeId));
			visited.add(sourceNodeId);
		}

		StreamNode currentNode;
		while ((currentNode = remaining.poll()) != null) {
			// Generate the hash code. Because multiple path exist to each
			// node, we might not have all required inputs available to
			// generate the hash code.
			if (generateNodeHash(currentNode, hashFunction, hashes)) {
				// Add the child nodes
				for (StreamEdge outEdge : currentNode.getOutEdges()) {
					StreamNode child = outEdge.getTargetVertex();

					if (!visited.contains(child.getId())) {
						remaining.add(child);
						visited.add(child.getId());
					}
				}
			}
			else {
				// We will revisit this later.
				visited.remove(currentNode.getId());
			}
		}

		return hashes;
	}

	/**
	 * Generates a hash for the node and returns whether the operation was
	 * successful.
	 *
	 * @param node         The node to generate the hash for
	 * @param hashFunction The hash function to use
	 * @param hashes       The current state of generated hashes
	 * @return <code>true</code> if the node hash has been generated.
	 * <code>false</code>, otherwise. If the operation is not successful, the
	 * hash needs be generated at a later point when all input is available.
	 * @throws IllegalStateException If node has user-specified hash and is
	 *                               intermediate node of a chain
	 */
	private boolean generateNodeHash(
			StreamNode node,
			HashFunction hashFunction,
			Map<Integer, byte[]> hashes) {

		// Check for user-specified ID
		String userSpecifiedHash = node.getTransformationId();

		if (userSpecifiedHash == null) {
			// Check that all input nodes have their hashes computed
			for (StreamEdge inEdge : node.getInEdges()) {
				// If the input node has not been visited yet, the current
				// node will be visited again at a later point when all input
				// nodes have been visited and their hashes set.
				if (!hashes.containsKey(inEdge.getSourceId())) {
					return false;
				}
			}

			Hasher hasher = hashFunction.newHasher();
			byte[] hash = generateDeterministicHash(node, hasher, hashes);

			if (hashes.put(node.getId(), hash) != null) {
				// Sanity check
				throw new IllegalStateException("Unexpected state. Tried to add node hash " +
						"twice. This is probably a bug in the JobGraph generator.");
			}

			return true;
		}
		else {
			// Check that this node is not part of a chain. This is currently
			// not supported, because the runtime takes the snapshots by the
			// operator ID of the first vertex in a chain. It's OK if the node
			// has chained outputs.
			for (StreamEdge inEdge : node.getInEdges()) {
				if (isChainable(inEdge)) {
					throw new UnsupportedOperationException("Cannot assign user-specified hash "
							+ "to intermediate node in chain. This will be supported in future "
							+ "versions of Flink. As a work around start new chain at task "
							+ node.getOperatorName() + ".");
				}
			}

			Hasher hasher = hashFunction.newHasher();
			byte[] hash = generateUserSpecifiedHash(node, hasher);

			for (byte[] previousHash : hashes.values()) {
				if (Arrays.equals(previousHash, hash)) {
					throw new IllegalArgumentException("Hash collision on user-specified ID. " +
							"Most likely cause is a non-unique ID. Please check that all IDs " +
							"specified via `uid(String)` are unique.");
				}
			}

			if (hashes.put(node.getId(), hash) != null) {
				// Sanity check
				throw new IllegalStateException("Unexpected state. Tried to add node hash " +
						"twice. This is probably a bug in the JobGraph generator.");
			}

			return true;
		}
	}

	/**
	 * Generates a hash from a user-specified ID.
	 */
	private byte[] generateUserSpecifiedHash(StreamNode node, Hasher hasher) {
		hasher.putString(node.getTransformationId(), Charset.forName("UTF-8"));

		return hasher.hash().asBytes();
	}

	/**
	 * Generates a deterministic hash from node-local properties and input and
	 * output edges.
	 */
	private byte[] generateDeterministicHash(
			StreamNode node,
			Hasher hasher,
			Map<Integer, byte[]> hashes) {

		// Include stream node to hash. We use the current size of the computed
		// hashes as the ID. We cannot use the node's ID, because it is
		// assigned from a static counter. This will result in two identical
		// programs having different hashes.
		generateNodeLocalHash(node, hasher, hashes.size());

		// Include chained nodes to hash
		for (StreamEdge outEdge : node.getOutEdges()) {
			if (isChainable(outEdge)) {
				StreamNode chainedNode = outEdge.getTargetVertex();

				// Use the hash size again, because the nodes are chained to
				// this node. This does not add a hash for the chained nodes.
				generateNodeLocalHash(chainedNode, hasher, hashes.size());
			}
		}

		byte[] hash = hasher.hash().asBytes();

		// Make sure that all input nodes have their hash set before entering
		// this loop (calling this method).
		for (StreamEdge inEdge : node.getInEdges()) {
			byte[] otherHash = hashes.get(inEdge.getSourceId());

			// Sanity check
			if (otherHash == null) {
				throw new IllegalStateException("Missing hash for input node "
						+ inEdge.getSourceVertex() + ". Cannot generate hash for "
						+ node + ".");
			}

			for (int j = 0; j < hash.length; j++) {
				hash[j] = (byte) (hash[j] * 37 ^ otherHash[j]);
			}
		}

		if (LOG.isDebugEnabled()) {
			String udfClassName = "";
			if (node.getOperator() instanceof AbstractUdfStreamOperator) {
				udfClassName = ((AbstractUdfStreamOperator<?, ?>) node.getOperator())
						.getUserFunction().getClass().getName();
			}

			LOG.debug("Generated hash '" + byteToHexString(hash) + "' for node " +
					"'" + node.toString() + "' {id: " + node.getId() + ", " +
					"parallelism: " + node.getParallelism() + ", " +
					"user function: " + udfClassName + "}");
		}

		return hash;
	}

	/**
	 * Applies the {@link Hasher} to the {@link StreamNode} (only node local
	 * attributes are taken into account). The hasher encapsulates the current
	 * state of the hash.
	 *
	 * <p>The specified ID is local to this node. We cannot use the
	 * {@link StreamNode#id}, because it is incremented in a static counter.
	 * Therefore, the IDs for identical jobs will otherwise be different.
	 */
	private void generateNodeLocalHash(StreamNode node, Hasher hasher, int id) {
		// This resolves conflicts for otherwise identical source nodes. BUT
		// the generated hash codes depend on the ordering of the nodes in the
		// stream graph.
		hasher.putInt(id);

		hasher.putInt(node.getParallelism());

		if (node.getOperator() instanceof AbstractUdfStreamOperator) {
			String udfClassName = ((AbstractUdfStreamOperator<?, ?>) node.getOperator())
					.getUserFunction().getClass().getName();

			hasher.putString(udfClassName, Charset.forName("UTF-8"));
		}
	}
}



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章