提交 241c1ca4 编写于 作者: T Till Rohrmann

Replaced the JobClient by an actor.

上级 ac94253f
......@@ -20,6 +20,8 @@
package org.apache.flink.hadoopcompatibility.mapred.wrapper;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.core.memory.DataInputView;
......@@ -87,6 +89,30 @@ public class HadoopInputSplit implements InputSplit {
}
private void writeObject(ObjectOutputStream out) throws IOException {
out.writeInt(splitNumber);
out.writeUTF(hadoopInputSplitTypeName);
hadoopInputSplit.write(out);
}
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
this.splitNumber=in.readInt();
this.hadoopInputSplitTypeName = in.readUTF();
if(hadoopInputSplit == null) {
try {
Class<? extends org.apache.hadoop.io.Writable> inputSplit =
Class.forName(hadoopInputSplitTypeName).asSubclass(org.apache.hadoop.io.Writable.class);
this.hadoopInputSplit = (org.apache.hadoop.mapred.InputSplit) WritableFactories.newInstance( inputSplit );
}
catch (Exception e) {
throw new RuntimeException("Unable to create InputSplit", e);
}
}
this.hadoopInputSplit.readFields(in);
}
@Override
public int getSplitNumber() {
return this.splitNumber;
......
......@@ -20,6 +20,8 @@
package org.apache.flink.hadoopcompatibility.mapreduce.wrapper;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import org.apache.flink.core.io.InputSplit;
import org.apache.flink.core.memory.DataInputView;
......@@ -78,6 +80,31 @@ public class HadoopInputSplit implements InputSplit {
}
((Writable)this.mapreduceInputSplit).readFields(in);
}
private void writeObject(ObjectOutputStream out) throws IOException {
out.writeInt(this.splitNumber);
out.writeUTF(this.mapreduceInputSplit.getClass().getName());
Writable w = (Writable) this.mapreduceInputSplit;
w.write(out);
}
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
this.splitNumber=in.readInt();
String className = in.readUTF();
if(this.mapreduceInputSplit == null) {
try {
Class<? extends org.apache.hadoop.io.Writable> inputSplit =
Class.forName(className).asSubclass(org.apache.hadoop.io.Writable.class);
this.mapreduceInputSplit = (org.apache.hadoop.mapreduce.InputSplit) WritableFactories.newInstance(inputSplit);
} catch (Exception e) {
throw new RuntimeException("Unable to create InputSplit", e);
}
}
((Writable)this.mapreduceInputSplit).readFields(in);
}
@Override
public int getSplitNumber() {
......
......@@ -35,6 +35,7 @@ import java.util.Map;
import java.util.Properties;
import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
......@@ -54,12 +55,13 @@ import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.GlobalConfiguration;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.event.job.RecentJobEvent;
import org.apache.flink.runtime.jobmanager.RunningJob;
import org.apache.flink.runtime.jobgraph.JobID;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobmanager.JobManager;
import org.apache.flink.runtime.messages.EventCollectorMessages;
import org.apache.flink.runtime.messages.JobManagerMessages;
import org.apache.flink.runtime.messages.JobManagerMessages.CancelJob;
import org.apache.flink.runtime.messages.JobManagerMessages.RequestRunningJobs$;
import org.apache.flink.runtime.messages.JobManagerMessages.RunningJobsResponse;
import org.apache.flink.util.StringUtils;
/**
......@@ -508,34 +510,34 @@ public class CliFrontend {
return 1;
}
List<RecentJobEvent> recentJobs = AkkaUtils.<EventCollectorMessages.RecentJobs>ask(jobManager,
EventCollectorMessages.RequestRecentJobEvents$.MODULE$).asJavaList();
List<RunningJob> jobs = AkkaUtils.<RunningJobsResponse>ask(jobManager,
RequestRunningJobs$.MODULE$).asJavaList();
ArrayList<RecentJobEvent> runningJobs = null;
ArrayList<RecentJobEvent> scheduledJobs = null;
ArrayList<RunningJob> runningJobs = null;
ArrayList<RunningJob> scheduledJobs = null;
if (running) {
runningJobs = new ArrayList<RecentJobEvent>();
runningJobs = new ArrayList<RunningJob>();
}
if (scheduled) {
scheduledJobs = new ArrayList<RecentJobEvent>();
scheduledJobs = new ArrayList<RunningJob>();
}
for (RecentJobEvent rje : recentJobs) {
for (RunningJob rj : jobs) {
if (running && rje.getJobStatus().equals(JobStatus.RUNNING)) {
runningJobs.add(rje);
if (running && rj.jobStatus().equals(JobStatus.RUNNING)) {
runningJobs.add(rj);
}
if (scheduled && rje.getJobStatus().equals(JobStatus.CREATED)) {
scheduledJobs.add(rje);
if (scheduled && rj.jobStatus().equals(JobStatus.CREATED)) {
scheduledJobs.add(rj);
}
}
SimpleDateFormat df = new SimpleDateFormat("dd.MM.yyyy HH:mm:ss");
Comparator<RecentJobEvent> njec = new Comparator<RecentJobEvent>(){
Comparator<RunningJob> njec = new Comparator<RunningJob>(){
@Override
public int compare(RecentJobEvent o1, RecentJobEvent o2) {
return (int)(o1.getTimestamp()-o2.getTimestamp());
public int compare(RunningJob o1, RunningJob o2) {
return (int)(o1.timestamp()-o2.timestamp());
}
};
......@@ -546,8 +548,9 @@ public class CliFrontend {
Collections.sort(runningJobs, njec);
System.out.println("------------------------ Running Jobs ------------------------");
for(RecentJobEvent je : runningJobs) {
System.out.println(df.format(new Date(je.getTimestamp()))+" : "+je.getJobID().toString()+" : "+je.getJobName());
for(RunningJob rj : runningJobs) {
System.out.println(df.format(new Date(rj.timestamp()))+" : "+rj
.jobID().toString()+" : "+rj.jobName());
}
System.out.println("--------------------------------------------------------------");
}
......@@ -559,8 +562,9 @@ public class CliFrontend {
Collections.sort(scheduledJobs, njec);
System.out.println("----------------------- Scheduled Jobs -----------------------");
for(RecentJobEvent je : scheduledJobs) {
System.out.println(df.format(new Date(je.getTimestamp()))+" : "+je.getJobID().toString()+" : "+je.getJobName());
for(RunningJob rj : scheduledJobs) {
System.out.println(df.format(new Date(rj.timestamp()))+" : "+rj.jobID()
.toString()+" : "+rj.jobName());
}
System.out.println("--------------------------------------------------------------");
}
......@@ -627,7 +631,7 @@ public class CliFrontend {
return 1;
}
AkkaUtils.ask(jobManager, new JobManagerMessages.CancelJob(jobId));
AkkaUtils.ask(jobManager, new CancelJob(jobId));
return 0;
}
catch (Throwable t) {
......@@ -750,7 +754,8 @@ public class CliFrontend {
return null;
}
return JobManager.getJobManager(jobManagerAddress);
return JobManager.getJobManager(jobManagerAddress,
ActorSystem.create("CliFrontendActorSystem", AkkaUtils.getDefaultActorSystemConfig()));
}
......
......@@ -21,11 +21,13 @@ package org.apache.flink.client;
import java.util.List;
import akka.actor.ActorRef;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.Plan;
import org.apache.flink.api.common.PlanExecutor;
import org.apache.flink.api.common.Program;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.client.JobClient;
import org.apache.flink.runtime.jobgraph.JobGraph;
......@@ -54,22 +56,13 @@ public class LocalExecutor extends PlanExecutor {
// ---------------------------------- config options ------------------------------------------
private int jobManagerRpcPort = -1;
private int taskManagerRpcPort = -1;
private int taskManagerDataPort = -1;
private int taskManagerNumSlots = DEFAULT_TASK_MANAGER_NUM_SLOTS;
private String configDir;
private String hdfsConfigFile;
private boolean defaultOverwriteFiles = DEFAULT_OVERWRITE;
private boolean defaultAlwaysCreateDirectory = false;
// --------------------------------------------------------------------------------------------
public LocalExecutor() {
......@@ -78,45 +71,7 @@ public class LocalExecutor extends PlanExecutor {
}
}
public int getJobManagerRpcPort() {
return jobManagerRpcPort;
}
public void setJobManagerRpcPort(int jobManagerRpcPort) {
this.jobManagerRpcPort = jobManagerRpcPort;
}
public int getTaskManagerRpcPort() {
return taskManagerRpcPort;
}
public void setTaskManagerRpcPort(int taskManagerRpcPort) {
this.taskManagerRpcPort = taskManagerRpcPort;
}
public int getTaskManagerDataPort() {
return taskManagerDataPort;
}
public void setTaskManagerDataPort(int taskManagerDataPort) {
this.taskManagerDataPort = taskManagerDataPort;
}
public String getConfigDir() {
return configDir;
}
public void setConfigDir(String configDir) {
this.configDir = configDir;
}
public String getHdfsConfig() {
return hdfsConfigFile;
}
public void setHdfsConfig(String hdfsConfig) {
this.hdfsConfigFile = hdfsConfig;
}
public boolean isDefaultOverwriteFiles() {
return defaultOverwriteFiles;
......@@ -126,14 +81,6 @@ public class LocalExecutor extends PlanExecutor {
this.defaultOverwriteFiles = defaultOverwriteFiles;
}
public boolean isDefaultAlwaysCreateDirectory() {
return defaultAlwaysCreateDirectory;
}
public void setDefaultAlwaysCreateDirectory(boolean defaultAlwaysCreateDirectory) {
this.defaultAlwaysCreateDirectory = defaultAlwaysCreateDirectory;
}
public void setTaskManagerNumSlots(int taskManagerNumSlots) { this.taskManagerNumSlots = taskManagerNumSlots; }
public int getTaskManagerNumSlots() { return this.taskManagerNumSlots; }
......@@ -147,7 +94,8 @@ public class LocalExecutor extends PlanExecutor {
// create the embedded runtime
this.flink = new LocalFlinkMiniCluster(configDir);
Configuration configuration = new Configuration();
configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, getTaskManagerNumSlots());
configuration.setBoolean(ConfigConstants.FILESYSTEM_DEFAULT_OVERWRITE_KEY, isDefaultOverwriteFiles());
// start it up
this.flink.start(configuration);
} else {
......@@ -216,10 +164,10 @@ public class LocalExecutor extends PlanExecutor {
NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
JobGraph jobGraph = jgg.compileJobGraph(op);
JobClient jobClient = this.flink.getJobClient(jobGraph);
JobExecutionResult result = jobClient.submitJobAndWait();
return result;
ActorRef jobClient = flink.getJobClient();
return JobClient.submitJobAndWait(jobGraph, true, jobClient);
}
finally {
if (shutDownAtEnd) {
......
......@@ -25,6 +25,10 @@ import java.io.PrintStream;
import java.net.InetSocketAddress;
import java.util.List;
import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import org.apache.flink.runtime.messages.JobManagerMessages.SubmissionFailure;
import org.apache.flink.runtime.messages.JobManagerMessages.SubmissionResponse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.api.common.JobExecutionResult;
......@@ -44,8 +48,6 @@ import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.client.JobClient;
import org.apache.flink.runtime.client.JobExecutionException;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.messages.JobResult;
import org.apache.flink.runtime.messages.JobResult.JobSubmissionResult;
import com.google.common.base.Preconditions;
......@@ -64,8 +66,6 @@ public class Client {
private final PactCompiler compiler; // the compiler to compile the jobs
private final ClassLoader userCodeClassLoader;
private boolean printStatusDuringExecution;
// ------------------------------------------------------------------------
......@@ -81,10 +81,10 @@ public class Client {
public Client(InetSocketAddress jobManagerAddress, Configuration config, ClassLoader userCodeClassLoader) {
Preconditions.checkNotNull(config, "Configuration is null");
this.configuration = config;
configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, jobManagerAddress.getAddress().getHostAddress());
configuration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY,
jobManagerAddress.getAddress().getCanonicalHostName());
configuration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerAddress.getPort());
this.userCodeClassLoader = userCodeClassLoader;
this.compiler = new PactCompiler(new DataStatistics(), new DefaultCostEstimator());
}
......@@ -109,7 +109,6 @@ public class Client {
throw new CompilerException("Cannot find port to job manager's RPC service in the global configuration.");
}
this.userCodeClassLoader = userCodeClassLoader;
this.compiler = new PactCompiler(new DataStatistics(), new DefaultCostEstimator());
}
......@@ -295,39 +294,50 @@ public class Client {
}
public JobExecutionResult run(JobGraph jobGraph, boolean wait) throws ProgramInvocationException {
JobClient client;
try {
client = new JobClient(jobGraph, configuration, this.userCodeClassLoader);
Tuple2<ActorSystem, ActorRef> pair = JobClient.startActorSystemAndActor(configuration);
ActorRef client = pair._2();
String hostname = configuration.getString(ConfigConstants
.JOB_MANAGER_IPC_ADDRESS_KEY, null);
if(hostname == null){
throw new ProgramInvocationException("Could not find hostname of job manager.");
}
catch (IOException e) {
throw new ProgramInvocationException("Could not open job manager: " + e.getMessage());
try {
JobClient.uploadJarFiles(jobGraph, hostname, client);
}catch(IOException e){
throw new ProgramInvocationException("Could not upload blobs.", e);
}
client.setConsoleStreamForReporting(this.printStatusDuringExecution ? System.out : null);
try {
if (wait) {
return client.submitJobAndWait();
return JobClient.submitJobAndWait(jobGraph, printStatusDuringExecution, client);
}
else {
JobSubmissionResult result = client.submitJob();
if (result.returnCode() != JobResult.SUCCESS()) {
throw new ProgramInvocationException("The job was not successfully submitted to the nephele job manager"
+ (result.description() == null ? "." : ": " + result.description()));
SubmissionResponse response =JobClient.submitJobDetached(jobGraph,
printStatusDuringExecution, client);
if(response instanceof SubmissionFailure){
SubmissionFailure failure = (SubmissionFailure) response;
throw new ProgramInvocationException("The job was not successfully submitted " +
"to the flink job manager", failure.cause());
}
}
}
catch (IOException e) {
throw new ProgramInvocationException("Could not submit job to job manager: " + e.getMessage());
}
catch (JobExecutionException jex) {
if(jex.isJobCanceledByUser()) {
throw new ProgramInvocationException("The program has been canceled");
} else {
throw new ProgramInvocationException("The program execution failed: " + jex.getMessage());
}
}finally{
pair._1().shutdown();
pair._1().awaitTermination();
}
return new JobExecutionResult(-1, null);
}
......
......@@ -31,12 +31,12 @@ import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.event.job.RecentJobEvent;
import org.apache.flink.runtime.jobmanager.JobManager;
import org.apache.flink.runtime.messages.EventCollectorMessages;
public class JobsInfoServlet extends HttpServlet {
......@@ -48,9 +48,13 @@ public class JobsInfoServlet extends HttpServlet {
// ------------------------------------------------------------------------
private final Configuration config;
private final ActorSystem system;
public JobsInfoServlet(Configuration nepheleConfig) {
this.config = nepheleConfig;
system = ActorSystem.create("JobsInfoServletActorSystem",
AkkaUtils.getDefaultActorSystemConfig());
}
@Override
......@@ -62,10 +66,10 @@ public class JobsInfoServlet extends HttpServlet {
int jmPort = config.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY,
ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT);
ActorRef jm = JobManager.getJobManager(new InetSocketAddress(jmHost, jmPort));
ActorRef jm = JobManager.getJobManager(new InetSocketAddress(jmHost, jmPort), system);
List<RecentJobEvent> recentJobs = AkkaUtils.<EventCollectorMessages.RecentJobs>ask(jm,
EventCollectorMessages.RequestRecentJobEvents$.MODULE$).asJavaList();
// TODO: fix
List<RecentJobEvent> recentJobs = null;
ArrayList<RecentJobEvent> jobs = new ArrayList<RecentJobEvent>(recentJobs);
......
......@@ -25,19 +25,17 @@ import static org.junit.Assert.fail;
import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import akka.actor.Props;
import akka.actor.Status;
import akka.actor.UntypedActor;
import akka.testkit.JavaTestKit;
import org.apache.commons.cli.CommandLine;
import org.apache.flink.runtime.event.job.RecentJobEvent;
import org.apache.flink.runtime.jobmanager.RunningJob;
import org.apache.flink.runtime.jobgraph.JobID;
import org.apache.flink.runtime.messages.EventCollectorMessages;
import org.apache.flink.runtime.messages.JobManagerMessages;
import org.apache.flink.runtime.messages.JobResult;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.ArrayList;
import scala.collection.Seq;
//TODO: Update test case
public class CliFrontendListCancelTest {
......@@ -162,12 +160,13 @@ public class CliFrontendListCancelTest {
public void onReceive(Object message) throws Exception {
if(message instanceof JobManagerMessages.RequestAvailableSlots$){
getSender().tell(1, getSelf());
}else if(message instanceof EventCollectorMessages.RequestRecentJobEvents$) {
getSender().tell(new EventCollectorMessages.RecentJobs(new ArrayList<RecentJobEvent>()), getSelf());
}else if(message instanceof JobManagerMessages.CancelJob){
JobManagerMessages.CancelJob cancelJob = (JobManagerMessages.CancelJob) message;
assertEquals(jobID, cancelJob.jobID());
getSender().tell(new JobResult.JobCancelResult(JobResult.SUCCESS(), null), getSelf());
getSender().tell(new Status.Success(new Object()), getSelf());
}else if(message instanceof JobManagerMessages.RequestRunningJobs$){
getSender().tell(new JobManagerMessages.RunningJobsResponse(),
getSelf());
}
}
}
......
......@@ -18,6 +18,8 @@
package org.apache.flink.client.program;
import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.Plan;
......@@ -28,10 +30,9 @@ import org.apache.flink.compiler.plan.OptimizedPlan;
import org.apache.flink.compiler.plantranslate.NepheleJobGraphGenerator;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.client.JobClient;
import org.apache.flink.runtime.client.JobClient$;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.messages.JobResult;
import org.apache.flink.runtime.messages.JobResult.JobSubmissionResult;
import org.apache.flink.runtime.messages.JobManagerMessages;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
......@@ -40,6 +41,8 @@ import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import org.powermock.core.classloader.annotations.PrepareForTest;
import org.powermock.modules.junit4.PowerMockRunner;
import org.powermock.reflect.Whitebox;
import scala.Tuple2;
import java.io.IOException;
......@@ -57,7 +60,7 @@ import static org.powermock.api.mockito.PowerMockito.whenNew;
* Simple and maybe stupid test to check the {@link Client} class.
*/
@RunWith(PowerMockRunner.class)
@PrepareForTest(Client.class)
@PrepareForTest({Client.class, JobClient$.class})
public class ClientTest {
@Mock Configuration configMock;
......@@ -68,8 +71,11 @@ public class ClientTest {
@Mock OptimizedPlan optimizedPlanMock;
@Mock NepheleJobGraphGenerator generatorMock;
@Mock JobGraph jobGraphMock;
@Mock JobClient jobClientMock;
@Mock JobSubmissionResult jobSubmissionResultMock;
@Mock ActorSystem mockSystem;
@Mock JobClient$ mockJobClient;
@Mock JobManagerMessages.SubmissionSuccess mockSubmissionSuccess;
@Mock JobManagerMessages.SubmissionFailure mockSubmissionFailure;
@Mock ActorRef mockJobClientActor;
@Before
public void setUp() throws Exception {
......@@ -90,14 +96,16 @@ public class ClientTest {
whenNew(NepheleJobGraphGenerator.class).withNoArguments().thenReturn(generatorMock);
when(generatorMock.compileJobGraph(optimizedPlanMock)).thenReturn(jobGraphMock);
whenNew(JobClient.class).withArguments(any(JobGraph.class), any(Configuration.class), any(ClassLoader.class)).thenReturn(this.jobClientMock);
Whitebox.setInternalState(JobClient$.class, mockJobClient);
when(this.jobClientMock.submitJob()).thenReturn(jobSubmissionResultMock);
when(mockJobClient.startActorSystemAndActor(configMock)).thenReturn(new Tuple2<ActorSystem,
ActorRef>(mockSystem, mockJobClientActor));
}
@Test
public void shouldSubmitToJobClient() throws ProgramInvocationException, IOException {
when(jobSubmissionResultMock.returnCode()).thenReturn(JobResult.SUCCESS());
when(mockJobClient.submitJobDetached(any(JobGraph.class), any(boolean.class),
any(ActorRef.class))).thenReturn(mockSubmissionSuccess);
Client out = new Client(configMock, getClass().getClassLoader());
out.run(program.getPlanWithJars(), -1, false);
......@@ -105,18 +113,16 @@ public class ClientTest {
verify(this.compilerMock, times(1)).compile(planMock);
verify(this.generatorMock, times(1)).compileJobGraph(optimizedPlanMock);
verify(this.jobClientMock, times(1)).submitJob();
}
@Test(expected = ProgramInvocationException.class)
public void shouldThrowException() throws Exception {
when(jobSubmissionResultMock.returnCode()).thenReturn(JobResult.ERROR());
when(mockJobClient.submitJobDetached(any(JobGraph.class), any(boolean.class),
any(ActorRef.class))).thenReturn(mockSubmissionFailure);
Client out = new Client(configMock, getClass().getClassLoader());
out.run(program.getPlanWithJars(), -1, false);
program.deleteExtractedLibraries();
verify(this.jobClientMock).submitJob();
}
@Test(expected = InvalidProgramException.class)
......
......@@ -22,10 +22,10 @@ package org.apache.flink.api.common;
import java.util.Map;
public class JobExecutionResult {
private long netRuntime;
private Map<String, Object> accumulatorResults;
public JobExecutionResult(long netRuntime, Map<String, Object> accumulators) {
this.netRuntime = netRuntime;
this.accumulatorResults = accumulators;
......
......@@ -41,7 +41,7 @@ import java.io.Serializable;
* Type of the accumulator result as it will be reported to the
* client
*/
public interface Accumulator<V, R> extends Serializable, Cloneable{
public interface Accumulator<V, R extends Serializable> extends Serializable, Cloneable{
/**
* @param value
......
......@@ -19,6 +19,7 @@
package org.apache.flink.api.common.accumulators;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
......@@ -53,7 +54,8 @@ public class AccumulatorHelper {
/**
* Workaround method for type safety
*/
private static final <V, R> void mergeSingle(Accumulator<?, ?> target, Accumulator<?, ?> toMerge) {
private static final <V, R extends Serializable> void mergeSingle(Accumulator<?, ?> target,
Accumulator<?, ?> toMerge) {
@SuppressWarnings("unchecked")
Accumulator<V, R> typedTarget = (Accumulator<V, R>) target;
......
......@@ -33,11 +33,11 @@ import java.util.TreeMap;
* Could be extended to continuous values later, but then we need to dynamically
* decide about the bin size in an online algorithm (or ask the user)
*/
public class Histogram implements Accumulator<Integer, Map<Integer, Integer>> {
public class Histogram implements Accumulator<Integer, TreeMap<Integer, Integer>> {
private static final long serialVersionUID = 1L;
private Map<Integer, Integer> treeMap = new TreeMap<Integer, Integer>();
private TreeMap<Integer, Integer> treeMap = new TreeMap<Integer, Integer>();
@Override
public void add(Integer value) {
......@@ -47,12 +47,12 @@ public class Histogram implements Accumulator<Integer, Map<Integer, Integer>> {
}
@Override
public Map<Integer, Integer> getLocalValue() {
public TreeMap<Integer, Integer> getLocalValue() {
return this.treeMap;
}
@Override
public void merge(Accumulator<Integer, Map<Integer, Integer>> other) {
public void merge(Accumulator<Integer, TreeMap<Integer, Integer>> other) {
// Merge the values into this map
for (Map.Entry<Integer, Integer> entryFromOther : ((Histogram) other).getLocalValue()
.entrySet()) {
......@@ -93,7 +93,7 @@ public class Histogram implements Accumulator<Integer, Map<Integer, Integer>> {
}
@Override
public Accumulator<Integer, Map<Integer, Integer>> clone() {
public Accumulator<Integer, TreeMap<Integer, Integer>> clone() {
Histogram result = new Histogram();
result.treeMap = new TreeMap<Integer, Integer>(treeMap);
......
......@@ -19,9 +19,10 @@
package org.apache.flink.api.common.accumulators;
import java.io.Serializable;
/**
* Similar to Accumulator, but the type of items to add and the result value
* must be the same.
*/
public interface SimpleAccumulator<T> extends Accumulator<T,T> {
}
public interface SimpleAccumulator<T extends Serializable> extends Accumulator<T,T> {}
......@@ -18,6 +18,7 @@
package org.apache.flink.api.common.functions;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
......@@ -77,7 +78,7 @@ public interface RuntimeContext {
* This is only needed to support generic accumulators (e.g. for
* Set<String>). Didn't find a way to get this work with getAccumulator.
*/
<V, A> void addAccumulator(String name, Accumulator<V, A> accumulator);
<V, A extends Serializable> void addAccumulator(String name, Accumulator<V, A> accumulator);
/**
* Get an existing accumulator object. The accumulator must have been added
......@@ -86,7 +87,7 @@ public interface RuntimeContext {
* Throws an exception if the accumulator does not exist or if the
* accumulator exists, but with different type.
*/
<V, A> Accumulator<V, A> getAccumulator(String name);
<V, A extends Serializable> Accumulator<V, A> getAccumulator(String name);
/**
* For system internal usage only. Use getAccumulator(...) to obtain a
......
......@@ -62,20 +62,22 @@ public abstract class FileOutputFormat<IT> implements OutputFormat<IT>, Initiali
private static OutputDirectoryMode DEFAULT_OUTPUT_DIRECTORY_MODE;
private static final void initDefaultsFromConfiguration() {
final boolean overwrite = GlobalConfiguration.getBoolean(ConfigConstants.FILESYSTEM_DEFAULT_OVERWRITE_KEY,
ConfigConstants.DEFAULT_FILESYSTEM_OVERWRITE);
private static final void initDefaultsFromConfiguration(Configuration configuration) {
final boolean overwrite = configuration.getBoolean(ConfigConstants
.FILESYSTEM_DEFAULT_OVERWRITE_KEY,
ConfigConstants.DEFAULT_FILESYSTEM_OVERWRITE);
DEFAULT_WRITE_MODE = overwrite ? WriteMode.OVERWRITE : WriteMode.NO_OVERWRITE;
final boolean alwaysCreateDirectory = GlobalConfiguration.getBoolean(ConfigConstants.FILESYSTEM_OUTPUT_ALWAYS_CREATE_DIRECTORY_KEY,
final boolean alwaysCreateDirectory = configuration.getBoolean(ConfigConstants
.FILESYSTEM_OUTPUT_ALWAYS_CREATE_DIRECTORY_KEY,
ConfigConstants.DEFAULT_FILESYSTEM_ALWAYS_CREATE_DIRECTORY);
DEFAULT_OUTPUT_DIRECTORY_MODE = alwaysCreateDirectory ? OutputDirectoryMode.ALWAYS : OutputDirectoryMode.PARONLY;
}
static {
initDefaultsFromConfiguration();
initDefaultsFromConfiguration(GlobalConfiguration.getConfiguration());
}
// --------------------------------------------------------------------------------------------
......
......@@ -189,16 +189,16 @@ public class EmptyFieldsCountAccumulator {
* increase the <i>n</i>-th vector component by 1, whereat <i>n</i> is the methods parameter. The size of the vector
* is automatically managed.
*/
public static class VectorAccumulator implements Accumulator<Integer, List<Integer>> {
public static class VectorAccumulator implements Accumulator<Integer, ArrayList<Integer>> {
/** Stores the accumulated vector components. */
private final List<Integer> resultVector;
private final ArrayList<Integer> resultVector;
public VectorAccumulator(){
this(new ArrayList<Integer>());
}
public VectorAccumulator(List<Integer> resultVector){
public VectorAccumulator(ArrayList<Integer> resultVector){
this.resultVector = resultVector;
}
......@@ -225,7 +225,7 @@ public class EmptyFieldsCountAccumulator {
}
@Override
public List<Integer> getLocalValue() {
public ArrayList<Integer> getLocalValue() {
return this.resultVector;
}
......@@ -236,7 +236,7 @@ public class EmptyFieldsCountAccumulator {
}
@Override
public void merge(final Accumulator<Integer, List<Integer>> other) {
public void merge(final Accumulator<Integer, ArrayList<Integer>> other) {
// merge two vector accumulators by adding their up their vector components
final List<Integer> otherVector = other.getLocalValue();
for (int index = 0; index < otherVector.size(); index++) {
......@@ -265,7 +265,7 @@ public class EmptyFieldsCountAccumulator {
}
@Override
public Accumulator<Integer, List<Integer>> clone() {
public Accumulator<Integer, ArrayList<Integer>> clone() {
VectorAccumulator result = new VectorAccumulator(new ArrayList<Integer>(resultVector));
return result;
......
......@@ -23,17 +23,14 @@ import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Serializable;
import java.security.MessageDigest;
import java.util.Arrays;
import org.apache.flink.core.io.IOReadableWritable;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
/**
* A BLOB key uniquely identifies a BLOB.
*/
public final class BlobKey implements IOReadableWritable, Comparable<BlobKey> {
public final class BlobKey implements Serializable, Comparable<BlobKey> {
/**
* Array of hex characters to facilitate fast toString() method.
......@@ -72,22 +69,6 @@ public final class BlobKey implements IOReadableWritable, Comparable<BlobKey> {
this.key = key;
}
/**
* {@inheritDoc}
*/
@Override
public void write(final DataOutputView out) throws IOException {
out.write(this.key);
}
/**
* {@inheritDoc}
*/
@Override
public void read(final DataInputView in) throws IOException {
in.readFully(this.key);
}
/**
* {@inheritDoc}
*/
......
......@@ -24,7 +24,20 @@ import java.net.InetSocketAddress;
import java.util.Iterator;
import java.util.Map;
import akka.actor.ActorPath;
import akka.actor.ActorRef;
import akka.actor.ActorRefProvider;
import akka.actor.ActorSystem;
import akka.actor.ActorSystemImpl;
import akka.actor.Extension;
import akka.actor.ExtensionId;
import akka.actor.InternalActorRef;
import akka.actor.Props;
import akka.actor.Scheduler;
import akka.dispatch.Dispatchers;
import akka.dispatch.Mailboxes;
import akka.event.EventStream;
import akka.event.LoggingAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.flink.api.common.JobExecutionResult;
......@@ -45,6 +58,9 @@ import org.apache.flink.runtime.messages.JobResult.JobCancelResult;
import org.apache.flink.runtime.messages.JobResult.JobSubmissionResult;
import org.apache.flink.runtime.messages.JobResult.JobProgressResult;
import org.apache.flink.util.StringUtils;
import scala.Function0;
import scala.concurrent.ExecutionContextExecutor;
import scala.concurrent.duration.Duration;
/**
* The job client is able to submit, control, and abort jobs.
......@@ -107,7 +123,9 @@ public class JobClient {
ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT);
final InetSocketAddress inetaddr = new InetSocketAddress(address, port);
this.jobManager = JobManager.getJobManager(inetaddr);
ActorSystem system = ActorSystem.create("JobClientActorSystem",
AkkaUtils.getDefaultActorSystemConfig());
this.jobManager = JobManager.getJobManager(inetaddr, system);
this.jobGraph = jobGraph;
this.configuration = configuration;
this.userCodeClassLoader = userCodeClassLoader;
......@@ -222,7 +240,9 @@ public class JobClient {
}
JobResult.JobProgressResult jobProgressResult = null;
LOG.info("Request job progress.");
jobProgressResult = getJobProgress();
LOG.info("Requested job progress.");
if (jobProgressResult == null) {
logErrorAndRethrow("Returned job progress is unexpectedly null!");
......@@ -232,6 +252,7 @@ public class JobClient {
logErrorAndRethrow("Could not retrieve job progress: " + jobProgressResult.description());
}
final Iterator<AbstractEvent> it = jobProgressResult.asJavaList().iterator();
while (it.hasNext()) {
......
......@@ -676,8 +676,8 @@ public class ExecutionGraph {
if(jobStatusListenerActors.size() > 0){
String message = error == null ? null : ExceptionUtils.stringifyException(error);
for(ActorRef listener: jobStatusListenerActors){
listener.tell(new JobStatusChanged(this, newState, message),
ActorRef.noSender());
listener.tell(new JobStatusChanged(jobID, newState, System.currentTimeMillis(),
message), ActorRef.noSender());
}
}
......@@ -708,7 +708,8 @@ public class ExecutionGraph {
String message = error == null ? null : ExceptionUtils.stringifyException(error);
for(ActorRef listener : executionListenerActors){
listener.tell(new ExecutionGraphMessages.ExecutionStateChanged(jobID, vertexId, subtask, executionID,
newExecutionState, message), ActorRef.noSender());
newExecutionState, System.currentTimeMillis(), message),
ActorRef.noSender());
}
}
......
......@@ -19,6 +19,7 @@
package org.apache.flink.runtime.io.network;
import java.io.IOException;
import java.io.Serializable;
import java.net.InetAddress;
import java.net.InetSocketAddress;
......@@ -29,7 +30,7 @@ import org.apache.flink.core.memory.DataOutputView;
/**
* Objects of this class uniquely identify a connection to a remote {@link org.apache.flink.runtime.taskmanager.TaskManager}.
*/
public final class RemoteReceiver implements IOReadableWritable {
public final class RemoteReceiver implements IOReadableWritable, Serializable {
/**
* The address of the connection to the remote TaskManager.
......
......@@ -35,8 +35,6 @@ import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FSDataInputStream;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.runtime.blob.BlobClient;
import org.apache.flink.runtime.blob.BlobKey;
......@@ -335,44 +333,6 @@ public class JobGraph implements Serializable {
this.numExecutionRetries = in.readInt();
out.writeInt(numExecutionRetries);
/**
* Writes the BLOB keys of the jar files required to run this job to the given {@link org.apache.flink.core.memory.DataOutputView}.
*
* @param out
* the data output to write the BLOB keys to
* @throws IOException
* thrown if an error occurs while writing to the data output
*/
private void writeJarBlobKeys(final DataOutputView out) throws IOException {
out.writeInt(this.userJarBlobKeys.size());
for (BlobKey userJarBlobKey : this.userJarBlobKeys) {
userJarBlobKey.write(out);
}
}
/**
* Reads the BLOB keys for the JAR files required to run this job and registers them.
*
* @param in
* the data stream to read the BLOB keys from
* @throws IOException
* thrown if an error occurs while reading the stream
*/
private void readJarBlobKeys(final DataInputView in) throws IOException {
// Do jar files follow;
final int numberOfBlobKeys = in.readInt();
for (int i = 0; i < numberOfBlobKeys; ++i) {
final BlobKey key = new BlobKey();
key.read(in);
this.userJarBlobKeys.add(key);
}
}
// --------------------------------------------------------------------------------------------
// Handling of attached JAR files
// --------------------------------------------------------------------------------------------
......
......@@ -70,16 +70,19 @@ public class AccumulatorManager {
}
}
/**
* Returns all collected accumulators for the job. For efficiency the
* internal accumulator is returned, so please use it read-only.
*/
public Map<String, Accumulator<?, ?>> getJobAccumulators(JobID jobID) {
JobAccumulators jobAccumulators = this.jobAccumulators.get(jobID);
if (jobAccumulators == null) {
return new HashMap<String, Accumulator<?, ?>>();
public Map<String, Object> getJobAccumulatorResults(JobID jobID) {
Map<String, Object> result = new HashMap<String, Object>();
JobAccumulators jobAccumulator = jobAccumulators.get(jobID);
if(jobAccumulator != null) {
for (Map.Entry<String, Accumulator<?, ?>> entry : jobAccumulators.get(jobID)
.getAccumulators().entrySet()) {
result.put(entry.getKey(), entry.getValue().getLocalValue());
}
}
return jobAccumulators.getAccumulators();
return result;
}
/**
......
......@@ -36,7 +36,7 @@ public class NoResourceAvailableException extends JobException {
+ ". You can decrease the operator parallelism or increase the number of slots per TaskManager in the configuration.");
}
NoResourceAvailableException(int numInstances, int numSlotsTotal) {
public NoResourceAvailableException(int numInstances, int numSlotsTotal) {
super(String.format("%s Resources available to scheduler: Number of instances=%d, total number of slots=%d",
BASE_MESSAGE, numInstances, numSlotsTotal));
}
......@@ -49,4 +49,17 @@ public class NoResourceAvailableException extends JobException {
public NoResourceAvailableException(String message) {
super(message);
}
@Override
public boolean equals(Object obj){
if(obj == null){
return false;
}
if(!(obj instanceof NoResourceAvailableException)){
return false;
}else{
return getMessage().equals(((NoResourceAvailableException)obj).getMessage());
}
}
}
......@@ -44,7 +44,7 @@ abstract public class FlinkMiniCluster {
protected List<ActorSystem> taskManagerActorSystems = new ArrayList<ActorSystem>();
protected List<ActorRef> taskManagerActors = new ArrayList<ActorRef>();
public abstract Configuration getConfiguration(final Configuration userConfiguration);
protected abstract Configuration generateConfiguration(final Configuration userConfiguration);
public abstract ActorRef startJobManager(final ActorSystem system, final Configuration configuration);
public abstract ActorRef startTaskManager(final ActorSystem system, final Configuration configuration,
......@@ -81,9 +81,9 @@ abstract public class FlinkMiniCluster {
// ------------------------------------------------------------------------
public void start(final Configuration configuration) throws Exception {
public void start(final Configuration configuration) {
Configuration clusterConfiguration = getConfiguration(configuration);
Configuration clusterConfiguration = generateConfiguration(configuration);
jobManagerActorSystem = startJobManagerActorSystem(clusterConfiguration);
jobManagerActor = startJobManager(jobManagerActorSystem, clusterConfiguration);
......@@ -101,22 +101,32 @@ abstract public class FlinkMiniCluster {
waitForTaskManagersToBeRegistered();
}
public void stop() throws Exception {
public void stop() {
LOG.info("Stopping FlinkMiniCluster.");
shutdown();
awaitTermination();
taskManagerActorSystems.clear();
taskManagerActors.clear();
}
protected void shutdown() {
for(ActorSystem system: taskManagerActorSystems){
system.shutdown();
}
jobManagerActorSystem.shutdown();
}
protected void awaitTermination() {
for(ActorSystem system: taskManagerActorSystems){
system.awaitTermination();
}
jobManagerActorSystem.awaitTermination();
taskManagerActorSystems.clear();
taskManagerActors.clear();
}
// ------------------------------------------------------------------------
// Network utility methods
// ------------------------------------------------------------------------
......
......@@ -20,13 +20,12 @@ package org.apache.flink.runtime.minicluster;
import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import org.apache.flink.api.common.io.FileInputFormat;
import org.apache.flink.api.common.io.FileOutputFormat;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.GlobalConfiguration;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.client.JobClient;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobmanager.JobManager;
import org.apache.flink.runtime.taskmanager.TaskManager;
import org.apache.flink.runtime.util.EnvironmentInformation;
......@@ -40,6 +39,7 @@ public class LocalFlinkMiniCluster extends FlinkMiniCluster {
private Configuration configuration;
private final String configDir;
private ActorSystem actorSystem;
public LocalFlinkMiniCluster(String configDir){
this.configDir = configDir;
......@@ -50,23 +50,53 @@ public class LocalFlinkMiniCluster extends FlinkMiniCluster {
// Life cycle and Job Submission
// ------------------------------------------------------------------------
public JobClient getJobClient(JobGraph jobGraph) throws Exception {
if(configuration == null){
throw new RuntimeException("The cluster has not been started yet.");
public ActorRef getJobClient() {
Configuration config = new Configuration();
config.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, HOSTNAME);
config.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, getJobManagerRPCPort());
return JobClient.startActorWithConfiguration(config, actorSystem);
}
public ActorSystem getJobClientActorSystem(){
return actorSystem;
}
@Override
public void start(Configuration configuration){
super.start(configuration);
actorSystem = AkkaUtils.createActorSystem();
}
@Override
protected void shutdown() {
super.shutdown();
if(actorSystem != null){
actorSystem.shutdown();
}
}
Configuration jobConfiguration = jobGraph.getJobConfiguration();
int jobManagerRPCPort = getJobManagerRPCPort();
jobConfiguration.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, HOSTNAME);
jobConfiguration.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, jobManagerRPCPort);
return new JobClient(jobGraph, jobConfiguration, getClass().getClassLoader());
@Override
protected void awaitTermination() {
if(actorSystem != null){
actorSystem.awaitTermination();
}
super.awaitTermination();
}
public int getJobManagerRPCPort() {
if(configuration == null){
throw new RuntimeException("Configuration has not been set.");
}
return configuration.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, -1);
}
public Configuration getConfiguration(final Configuration userConfiguration) {
protected Configuration generateConfiguration(final Configuration userConfiguration) {
if(configuration == null){
String forkNumberString = System.getProperty("forkNumber");
int forkNumber = -1;
......@@ -97,7 +127,7 @@ public class LocalFlinkMiniCluster extends FlinkMiniCluster {
configuration.setInteger(ConfigConstants.TASK_MANAGER_DATA_PORT_KEY, taskManagerDATA);
}
initializeIOFormatClasses();
initializeIOFormatClasses(configuration);
}
return configuration;
......@@ -130,15 +160,12 @@ public class LocalFlinkMiniCluster extends FlinkMiniCluster {
return TaskManager.startActorWithConfiguration(HOSTNAME, config, false, system);
}
private static void initializeIOFormatClasses() {
private static void initializeIOFormatClasses(Configuration configuration) {
try {
Method im = FileInputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration");
im.setAccessible(true);
im.invoke(null);
Method om = FileOutputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration");
Method om = FileOutputFormat.class.getDeclaredMethod("initDefaultsFromConfiguration",
Configuration.class);
om.setAccessible(true);
om.invoke(null);
om.invoke(null, configuration);
}
catch (Exception e) {
LOG.error("Cannot (re) initialize the globally loaded defaults. Some classes might mot follow the specified default behavior.");
......
......@@ -411,7 +411,8 @@ public final class Task {
for(ActorRef listener: executionListenerActors){
listener.tell(new ExecutionGraphMessages.ExecutionStateChanged(jobId, vertexId, subtaskIndex,
executionId, newState, message), ActorRef.noSender());
executionId, newState, System.currentTimeMillis(), message),
ActorRef.noSender());
}
}
......
......@@ -27,13 +27,13 @@ import akka.util.Timeout
import com.typesafe.config.ConfigFactory
import org.apache.flink.configuration.{ConfigConstants, Configuration}
import org.apache.flink.core.io.IOReadableWritable
import org.apache.flink.runtime.akka.serialization.IOReadableWritableSerializer
import org.apache.flink.runtime.akka.serialization.{WritableSerializer,
IOReadableWritableSerializer}
import org.apache.hadoop.io.Writable
import scala.concurrent.{ExecutionContext, Future, Await}
import scala.concurrent.duration._
object AkkaUtils {
lazy val defaultActorSystem = ActorSystem.create("DefaultActorSystem",
ConfigFactory.parseString(getDefaultActorSystemConfigString))
implicit val FUTURE_TIMEOUT: Timeout = 1 minute
implicit val AWAIT_DURATION: FiniteDuration = 1 minute
implicit val FUTURE_DURATION: FiniteDuration = 1 minute
......@@ -47,6 +47,10 @@ object AkkaUtils {
actorSystem
}
def createActorSystem(): ActorSystem = {
ActorSystem.create("default", getDefaultActorSystemConfig)
}
def getConfigString(host: String, port: Int, configuration: Configuration): String = {
val transportHeartbeatInterval = configuration.getString(ConfigConstants.
AKKA_TRANSPORT_HEARTBEAT_INTERVAL,
......@@ -96,23 +100,29 @@ object AkkaUtils {
def getDefaultActorSystemConfigString: String = {
s"""akka.daemonic = on
|akka.loggers = ["akka.event.slf4j.Slf4jLogger"]
|akka.loglevel = "INFO"
|akka.loglevel = "WARNING"
|akka.logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
|akka.stdout-loglevel = "INFO"
|akka.stdout-loglevel = "WARNING"
|akka.jvm-exit-on-fata-error = off
|akka.actor.provider = "akka.remote.RemoteActorRefProvider"
|akka.remote.netty.tcp.transport-class = "akka.remote.transport.netty.NettyTransport"
|akka.remote.netty.tcp.tcp-nodelay = on
|akka.log-config-on-start = off
|akka.remote.netty.tcp.port = 0
|akka.remote.netty.tcp.maximum-frame-size = 1MB
""".stripMargin
}
def getDefaultActorSystemConfig = {
ConfigFactory.parseString(getDefaultActorSystemConfigString)
}
def getChild(parent: ActorRef, child: String)(implicit system: ActorSystem): ActorRef = {
Await.result(system.actorSelection(parent.path / child).resolveOne(), AWAIT_DURATION)
}
def getReference(path: String): ActorRef = {
Await.result(defaultActorSystem.actorSelection(path).resolveOne(), AWAIT_DURATION)
def getReference(path: String)(implicit system: ActorSystem): ActorRef = {
Await.result(system.actorSelection(path).resolveOne(), AWAIT_DURATION)
}
@throws(classOf[IOException])
......
......@@ -16,46 +16,47 @@
* limitations under the License.
*/
package org.apache.flink.runtime.testingUtils
package org.apache.flink.runtime.akka.serialization
import akka.pattern.{ask, pipe}
import org.apache.flink.runtime.ActorLogMessages
import org.apache.flink.runtime.akka.AkkaUtils
import org.apache.flink.runtime.jobmanager.EventCollector
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.{ExecutionGraphNotFound, ExecutionGraphFound, RequestExecutionGraph}
import akka.serialization.JSerializer
import org.apache.flink.runtime.io.network.serialization.{DataInputDeserializer,
DataOutputSerializer}
import org.apache.flink.util.InstantiationUtil
import org.apache.hadoop.io.Writable
import scala.concurrent.Future
import scala.concurrent.duration._
class WritableSerializer extends JSerializer {
val INITIAL_BUFFER_SIZE = 8096
trait TestingEventCollector extends ActorLogMessages {
self: EventCollector =>
override protected def fromBinaryJava(bytes: Array[Byte], manifest: Class[_]): AnyRef = {
val in = new DataInputDeserializer(bytes, 0, bytes.length)
import context.dispatcher
import org.apache.flink.runtime.akka.AkkaUtils.FUTURE_TIMEOUT
val instance = InstantiationUtil.instantiate(manifest)
abstract override def receiveWithLogMessages: Receive = {
receiveTestingMessages orElse super.receiveWithLogMessages
if(!instance.isInstanceOf[Writable]){
throw new RuntimeException(s"Class $manifest is not of type IOReadableWritable.")
}
val writable = instance.asInstanceOf[Writable]
writable.readFields(in)
writable
}
def receiveTestingMessages: Receive = {
case RequestExecutionGraph(jobID) =>
recentExecutionGraphs.get(jobID) match {
case Some(executionGraph) => sender() ! ExecutionGraphFound(jobID, executionGraph)
case None =>
val responses = archiveListeners map {
listener =>
listener ? RequestExecutionGraph(jobID) filter {
case x: ExecutionGraphFound => true
case x: ExecutionGraphNotFound => false
}
}
val notFound = akka.pattern.after(200 millis, this.context.system.scheduler){
Future.successful{ExecutionGraphNotFound(jobID)}
}
Future firstCompletedOf(responses + notFound) pipeTo sender()
}
override def includeManifest: Boolean = true
override def toBinary(o: AnyRef): Array[Byte] = {
if(!o.isInstanceOf[Writable]){
throw new RuntimeException("Object is not of type Writable.")
}
val writable = o.asInstanceOf[Writable]
val out = new DataOutputSerializer(INITIAL_BUFFER_SIZE)
writable.write(out)
out.wrapAsByteBuffer().array()
}
override def identifier: Int = 1337
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.client
import java.io.IOException
import java.net.InetSocketAddress
import akka.actor.Status.Failure
import akka.actor._
import akka.pattern.ask
import org.apache.flink.api.common.JobExecutionResult
import org.apache.flink.configuration.{ConfigConstants, Configuration}
import org.apache.flink.runtime.ActorLogMessages
import org.apache.flink.runtime.akka.AkkaUtils
import org.apache.flink.runtime.jobgraph.JobGraph
import org.apache.flink.runtime.jobmanager.JobManager
import org.apache.flink.runtime.messages.ExecutionGraphMessages.ExecutionStateChanged
import org.apache.flink.runtime.messages.JobClientMessages.{SubmitJobDetached, SubmitJobAndWait}
import org.apache.flink.runtime.messages.JobManagerMessages._
import scala.concurrent.Await
import scala.concurrent.duration.Duration
class JobClient(jobManagerURL: String) extends Actor with ActorLogMessages with ActorLogging{
import context._
val jobManager = AkkaUtils.getReference(jobManagerURL)
override def receiveWithLogMessages: Receive = {
case SubmitJobDetached(jobGraph, listen) =>
jobManager.tell(SubmitJob(jobGraph, listenToEvents = listen, detach = true), sender())
case cancelJob: CancelJob =>
jobManager forward cancelJob
case SubmitJobAndWait(jobGraph, listen) =>
val listener = context.actorOf(Props(classOf[JobClientListener], sender()))
jobManager.tell(SubmitJob(jobGraph, listenToEvents = listen, detach = false), listener)
case RequestBlobManagerPort =>
jobManager forward RequestBlobManagerPort
}
}
class JobClientListener(client: ActorRef) extends Actor with ActorLogMessages with ActorLogging {
override def receiveWithLogMessages: Receive = {
case SubmissionFailure(_, t) =>
client ! Failure(t)
self ! PoisonPill
case JobResultSuccess(_, duration, accumulatorResults) =>
client ! new JobExecutionResult(duration, accumulatorResults)
self ! PoisonPill
case JobResultCanceled(_, msg) =>
client ! Failure(new JobExecutionException(msg, true))
self ! PoisonPill
case JobResultFailed(_, msg) =>
client ! Failure(new JobExecutionException(msg, false))
self ! PoisonPill
case e:ExecutionStateChanged =>
println(e.toString)
}
}
object JobClient{
val JOB_CLIENT_NAME = "jobclient"
def startActorSystemAndActor(config: Configuration): (ActorSystem, ActorRef) = {
implicit val actorSystem = AkkaUtils.createActorSystem(host = "localhost",
port =0, configuration = config)
(actorSystem, startActorWithConfiguration(config))
}
def startActor(jobManagerURL: String)(implicit actorSystem: ActorSystem): ActorRef = {
actorSystem.actorOf(Props(classOf[JobClient], jobManagerURL), JOB_CLIENT_NAME)
}
def parseConfiguration(configuration: Configuration): String = {
configuration.getString(ConfigConstants.JOB_MANAGER_AKKA_URL, null) match {
case url: String => url
case _ =>
val jobManagerAddress = configuration.getString(ConfigConstants
.JOB_MANAGER_IPC_ADDRESS_KEY, null);
val jobManagerRPCPort = configuration.getInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY,
ConfigConstants.DEFAULT_JOB_MANAGER_IPC_PORT);
if (jobManagerAddress == null) {
throw new RuntimeException("JobManager address has not been specified in the " +
"configuration.")
}
JobManager.getAkkaURL(jobManagerAddress + ":" + jobManagerRPCPort)
}
}
def startActorWithConfiguration(config: Configuration)(implicit actorSystem: ActorSystem):
ActorRef= {
startActor(parseConfiguration(config))
}
@throws(classOf[JobExecutionException])
def submitJobAndWait(jobGraph: JobGraph, listen: Boolean,
jobClient: ActorRef): JobExecutionResult = {
import AkkaUtils.FUTURE_TIMEOUT
val response = jobClient ? SubmitJobAndWait(jobGraph, listenToEvents = listen)
Await.result(response.mapTo[JobExecutionResult],Duration.Inf)
}
def submitJobDetached(jobGraph: JobGraph, listen: Boolean, jobClient: ActorRef): SubmissionResponse = {
import AkkaUtils.FUTURE_TIMEOUT
val response = jobClient ? SubmitJobDetached(jobGraph, listenToEvents = listen)
Await.result(response.mapTo[SubmissionResponse],AkkaUtils.FUTURE_DURATION)
}
@throws(classOf[IOException])
def uploadJarFiles(jobGraph: JobGraph, hostname: String, jobClient: ActorRef): Unit = {
val port = AkkaUtils.ask[Int](jobClient, RequestBlobManagerPort)
val serverAddress = new InetSocketAddress(hostname, port)
jobGraph.uploadRequiredJarFiles(serverAddress)
}
}
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmanager
import akka.actor.{Terminated, ActorRef, Actor, ActorLogging}
import akka.pattern.{ask, pipe}
import org.apache.flink.runtime.ActorLogMessages
import org.apache.flink.runtime.akka.AkkaUtils
import org.apache.flink.runtime.event.job._
import org.apache.flink.runtime.executiongraph._
import org.apache.flink.runtime.jobgraph.{JobStatus, JobID}
import org.apache.flink.runtime.messages.ArchiveMessages.{ArchiveExecutionGraph, ArchiveJobEvent,
ArchiveEvent}
import org.apache.flink.runtime.messages.EventCollectorMessages._
import org.apache.flink.runtime.messages.ExecutionGraphMessages.{CurrentJobStatus, JobNotFound,
JobStatusChanged, ExecutionStateChanged}
import org.apache.flink.runtime.messages.JobManagerMessages.RequestJobStatus
import org.apache.flink.runtime.messages.JobResult
import org.apache.flink.runtime.messages.JobResult.JobProgressResult
import scala.collection.convert.{WrapAsScala}
import scala.concurrent.Future
import scala.concurrent.duration._
class EventCollector(val timerTaskInterval: Int) extends Actor with ActorLogMessages with
ActorLogging with WrapAsScala {
import context.dispatcher
import AkkaUtils.FUTURE_TIMEOUT
val collectedEvents = collection.mutable.HashMap[JobID, List[AbstractEvent]]()
val recentJobs = collection.mutable.HashMap[JobID, RecentJobEvent]()
val recentExecutionGraphs = collection.mutable.HashMap[JobID, ExecutionGraph]()
val archiveListeners = collection.mutable.HashSet[ActorRef]()
val jobInformation = collection.mutable.HashMap[JobID, (String, Boolean, Long)]()
override def preStart(): Unit = {
startArchiveExpiredEvent()
}
override def postStop(): Unit = {
collectedEvents.clear()
recentJobs.clear()
recentExecutionGraphs.clear()
archiveListeners.clear()
jobInformation.clear()
}
def startArchiveExpiredEvent(): Unit = {
val schedulerDuration = FiniteDuration(2 * timerTaskInterval, SECONDS)
context.system.scheduler.schedule(schedulerDuration, schedulerDuration, self,
ArchiveExpiredEvents)
}
override def receiveWithLogMessages: Receive = {
case ArchiveExpiredEvents => {
val currentTime = System.currentTimeMillis()
collectedEvents.retain { (jobID, events) =>
val (outdatedElements, currentElements) = events.partition { event => event.getTimestamp +
timerTaskInterval < currentTime
}
outdatedElements foreach (archiveEvent(jobID, _))
currentElements.nonEmpty
}
recentJobs.retain { (jobID, recentJobEvent) =>
import JobStatus._
val status = recentJobEvent.getJobStatus
// only remove jobs which have stopped running
if ((status == FINISHED || status == CANCELED || status != FAILED) &&
recentJobEvent.getTimestamp + timerTaskInterval < currentTime) {
archiveJobEvent(jobID, recentJobEvent)
archiveExecutionGraph(jobID, recentExecutionGraphs.remove(jobID).get)
jobInformation.remove(jobID)
false
} else {
true
}
}
}
case RequestJobProgress(jobID) => {
sender() ! JobProgressResult(JobResult.SUCCESS, null, collectedEvents.getOrElse(jobID,
List()))
}
case RequestRecentJobEvents => {
sender() ! RecentJobs(recentJobs.values.toList)
}
case RegisterJob(executionGraph, profilingAvailable, submissionTimestamp) => {
val jobID = executionGraph.getJobID
executionGraph.registerExecutionListener(self)
executionGraph.registerJobStatusListener(self)
jobInformation += jobID ->(executionGraph.getJobName, profilingAvailable, submissionTimestamp)
recentExecutionGraphs += jobID -> executionGraph
}
case ExecutionStateChanged(jobID, vertexID, subtask, executionID, newExecutionState,
optionalMessage) => {
val timestamp = System.currentTimeMillis()
recentExecutionGraphs.get(jobID) match {
case Some(graph) =>
val vertex = graph.getJobVertex(vertexID)
val taskName = if (vertex != null) vertex.getJobVertex.getName else "(null)"
val totalNumberOfSubtasks = if (vertex != null) vertex.getParallelism else -1
val vertexEvent = new VertexEvent(timestamp, vertexID, taskName, totalNumberOfSubtasks,
subtask, executionID, newExecutionState, optionalMessage)
val events = collectedEvents.getOrElse(jobID, List())
val executionStateChangeEvent = new ExecutionStateChangeEvent(timestamp, vertexID,
subtask, executionID, newExecutionState)
collectedEvents += jobID -> (executionStateChangeEvent :: vertexEvent :: events)
case None =>
log.warning(s"Could not find execution graph with jobID ${jobID}.")
}
}
case JobStatusChanged(executionGraph, newJobStatus, optionalMessage) => {
val jobID = executionGraph.getJobID()
if (newJobStatus == JobStatus.RUNNING) {
this.recentExecutionGraphs += jobID -> executionGraph
}
val currentTime = System.currentTimeMillis()
val (jobName, isProfilingEnabled, submissionTimestamp) = jobInformation(jobID)
recentJobs.put(jobID, new RecentJobEvent(jobID, jobName, newJobStatus, isProfilingEnabled,
submissionTimestamp, currentTime))
val events = collectedEvents.getOrElse(jobID, List())
collectedEvents += jobID -> ((new JobEvent(currentTime, newJobStatus,
optionalMessage)) :: events)
}
case ProcessProfilingEvent(profilingEvent) => {
val events = collectedEvents.getOrElse(profilingEvent.getJobID, List())
collectedEvents += profilingEvent.getJobID -> (profilingEvent :: events)
}
case RegisterArchiveListener(actorListener) => {
context.watch(actorListener)
archiveListeners += actorListener
}
case Terminated(terminatedListener) => {
archiveListeners -= terminatedListener
}
case RequestJobStatus(jobID) => {
recentJobs.get(jobID) match {
case Some(recentJobEvent) => sender() ! CurrentJobStatus(jobID, recentJobEvent.getJobStatus)
case None =>
val responses = archiveListeners map { archivist => archivist ?
RequestJobStatus(jobID) filter {
case _: CurrentJobStatus => true
case _ => false
}
}
val noResponse = akka.pattern.after(AkkaUtils.FUTURE_DURATION, context.system.scheduler) {
Future.successful(JobNotFound(jobID))
}
Future.firstCompletedOf(responses + noResponse) pipeTo sender()
}
}
}
private def archiveEvent(jobID: JobID, event: AbstractEvent): Unit = {
for (listener <- archiveListeners) {
listener ! ArchiveEvent(jobID, event)
}
}
private def archiveJobEvent(jobID: JobID, event: RecentJobEvent): Unit = {
for (listener <- archiveListeners) {
listener ! ArchiveJobEvent(jobID, event)
}
}
private def archiveExecutionGraph(jobID: JobID, graph: ExecutionGraph): Unit = {
for (listener <- archiveListeners) {
listener ! ArchiveExecutionGraph(jobID, graph)
}
}
}
......@@ -16,45 +16,23 @@
* limitations under the License.
*/
package org.apache.flink.runtime.messages
package org.apache.flink.runtime.jobmanager
import akka.actor.ActorRef
import org.apache.flink.runtime.event.job.{AbstractEvent, RecentJobEvent}
import org.apache.flink.runtime.executiongraph.ExecutionGraph
import org.apache.flink.runtime.jobgraph.JobID
import org.apache.flink.runtime.profiling.types.ProfilingEvent
import scala.collection.convert.{WrapAsScala, DecorateAsJava}
class JobInfo(val client: ActorRef,val start: Long){
var end: Long = -1
var detach: Boolean = false
object EventCollectorMessages extends DecorateAsJava with WrapAsScala {
case class ProcessProfilingEvent(profilingEvent: ProfilingEvent)
case class RegisterArchiveListener(listener: ActorRef)
case class RequestJobProgress(jobID: JobID)
case class RegisterJob(executionGraph: ExecutionGraph, profilingAvailable: Boolean,
submissionTimestamp: Long)
case class RecentJobs(jobs: List[RecentJobEvent]) {
def this(_jobs: java.util.List[RecentJobEvent]) = {
this(_jobs.toList)
}
def asJavaList: java.util.List[RecentJobEvent] = {
jobs.asJava
def duration: Long = {
if(end != -1){
(end - start)/1000
}else{
-1
}
}
}
case class JobEvents(jobs: List[AbstractEvent]) {
def asJavaList: java.util.List[AbstractEvent] = {
jobs.asJava
}
}
case object ArchiveExpiredEvents
case object RequestRecentJobEvents
object JobInfo{
def apply(client: ActorRef, start: Long) = new JobInfo(client, start)
}
......@@ -26,35 +26,31 @@ import akka.pattern.Patterns
import com.google.common.base.Preconditions
import org.apache.flink.configuration.{ConfigConstants, GlobalConfiguration, Configuration}
import org.apache.flink.core.io.InputSplitAssigner
import org.apache.flink.runtime.accumulators.AccumulatorEvent
import org.apache.flink.runtime.blob.BlobServer
import org.apache.flink.runtime.executiongraph.{ExecutionJobVertex, ExecutionGraph}
import org.apache.flink.runtime.io.network.ConnectionInfoLookupResponse
import org.apache.flink.runtime.messages.ExecutionGraphMessages.{JobNotFound, CurrentJobStatus,
import org.apache.flink.runtime.messages.ArchiveMessages.ArchiveExecutionGraph
import org.apache.flink.runtime.messages.ExecutionGraphMessages.{CurrentJobStatus,
JobStatusChanged}
import org.apache.flink.runtime.messages.JobResult
import org.apache.flink.runtime.messages.JobResult.{JobCancelResult, JobSubmissionResult}
import org.apache.flink.runtime.{JobException, ActorLogMessages}
import org.apache.flink.runtime.akka.AkkaUtils
import org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager
import org.apache.flink.runtime.executiongraph.{ExecutionJobVertex, ExecutionGraph}
import org.apache.flink.runtime.instance.{InstanceManager}
import org.apache.flink.runtime.jobgraph.{JobStatus, JobID}
import org.apache.flink.runtime.jobmanager.accumulators.AccumulatorManager
import org.apache.flink.runtime.jobmanager.scheduler.{Scheduler => FlinkScheduler}
import org.apache.flink.runtime.messages.EventCollectorMessages._
import org.apache.flink.runtime.messages.JobManagerMessages._
import org.apache.flink.runtime.messages.RegistrationMessages._
import org.apache.flink.runtime.messages.TaskManagerMessages.{NextInputSplit, Heartbeat}
import org.apache.flink.runtime.profiling.ProfilingUtils
import org.apache.flink.util.StringUtils
import org.slf4j.LoggerFactory
import scala.collection.convert.WrapAsScala
import scala.concurrent.Future
import scala.concurrent.duration._
class JobManager(val archiveCount: Int, val profiling: Boolean, val recommendedPollingInterval:
Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLogging with WrapAsScala {
class JobManager(val archiveCount: Int, val profiling: Boolean, cleanupInterval: Long) extends
Actor with ActorLogMessages with ActorLogging with WrapAsScala {
import context._
......@@ -62,8 +58,6 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
def archiveProps: Props = Props(classOf[MemoryArchivist], archiveCount)
def eventCollectorProps: Props = Props(classOf[EventCollector], recommendedPollingInterval)
val profiler = profiling match {
case true => Some(context.actorOf(profilerProps, JobManager.PROFILER_NAME))
case false => None
......@@ -71,7 +65,6 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
// will be removed
val archive = context.actorOf(archiveProps, JobManager.ARCHIVE_NAME)
val eventCollector = context.actorOf(eventCollectorProps, JobManager.EVENT_COLLECTOR_NAME)
val accumulatorManager = new AccumulatorManager(Math.min(1, archiveCount))
val instanceManager = new InstanceManager()
......@@ -79,10 +72,8 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
val libraryCacheManager = new BlobLibraryCacheManager(new BlobServer(), cleanupInterval)
val webserver = null
val currentJobs = scala.collection.concurrent.TrieMap[JobID, ExecutionGraph]()
val jobTerminationListener = scala.collection.mutable.HashMap[JobID, Set[ActorRef]]()
eventCollector ! RegisterArchiveListener(archive)
val currentJobs = scala.collection.mutable.HashMap[JobID, (ExecutionGraph, JobInfo)]()
val finalJobStatusListener = scala.collection.mutable.HashMap[JobID, Set[ActorRef]]()
instanceManager.addInstanceListener(scheduler)
......@@ -112,12 +103,11 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
sender() ! instanceManager.getTotalNumberOfSlots
}
case SubmitJob(jobGraph) => {
var executionGraph: ExecutionGraph = null
case SubmitJob(jobGraph, listenToEvents, detach) => {
try {
if (jobGraph == null) {
JobSubmissionResult(JobResult.ERROR, "Submitted job is null.")
sender() ! akka.actor.Status.Failure(new IllegalArgumentException("JobGraph must not be" +
" null."))
} else {
log.info(s"Received job ${jobGraph.getJobID} (${jobGraph.getName}}).")
......@@ -125,9 +115,10 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
// Create the user code class loader
libraryCacheManager.register(jobGraph.getJobID, jobGraph.getUserJarBlobKeys)
executionGraph = currentJobs.getOrElseUpdate(jobGraph.getJobID(),
new ExecutionGraph(jobGraph.getJobID,
jobGraph.getName, jobGraph.getJobConfiguration, jobGraph.getUserJarBlobKeys))
val (executionGraph, jobInfo) = currentJobs.getOrElseUpdate(jobGraph.getJobID(),
(new ExecutionGraph(jobGraph.getJobID, jobGraph.getName,
jobGraph.getJobConfiguration, jobGraph.getUserJarBlobKeys), JobInfo(sender(),
System.currentTimeMillis())))
val userCodeLoader = libraryCacheManager.getClassLoader(jobGraph.getJobID)
......@@ -163,33 +154,43 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
// closed iterations)
executionGraph.setQueuedSchedulingAllowed(jobGraph.getAllowQueuedScheduling)
eventCollector ! RegisterJob(executionGraph, false, System.currentTimeMillis())
executionGraph.registerJobStatusListener(self)
if(listenToEvents){
executionGraph.registerExecutionListener(sender())
}
jobInfo.detach = detach
log.info(s"Scheduling job ${jobGraph.getName}.")
executionGraph.scheduleForExecution(scheduler)
sender() ! JobSubmissionResult(JobResult.SUCCESS, null)
sender() ! SubmissionSuccess(jobGraph.getJobID)
}
} catch {
case t: Throwable =>
log.error(t, "Job submission failed.")
if(executionGraph != null){
executionGraph.fail(t)
val status = Patterns.ask(self, RequestJobStatusWhenTerminated, 10 second)
status.onFailure{
case _: Throwable => self ! JobStatusChanged(executionGraph, JobStatus.FAILED,
s"Cleanup job ${jobGraph.getJobID}.")
}
}else {
libraryCacheManager.unregister(jobGraph.getJobID)
currentJobs.remove(jobGraph.getJobID)
currentJobs.get(jobGraph.getJobID) match {
case Some((executionGraph, jobInfo)) =>
executionGraph.fail(t)
// don't send the client the final job status
jobInfo.detach = true
val status = Patterns.ask(self, RequestFinalJobStatus(jobGraph.getJobID), 10 second)
status.onFailure{
case _: Throwable => self ! JobStatusChanged(executionGraph.getJobID,
JobStatus.FAILED, System.currentTimeMillis(), s"Cleanup job ${jobGraph.getJobID}.")
}
case None =>
libraryCacheManager.unregister(jobGraph.getJobID)
currentJobs.remove(jobGraph.getJobID)
}
sender() ! JobSubmissionResult(JobResult.ERROR, StringUtils.stringifyException(t))
sender() ! SubmissionFailure(jobGraph.getJobID, t)
}
}
......@@ -197,14 +198,15 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
log.info(s"Trying to cancel job with ID ${jobID}.")
currentJobs.get(jobID) match {
case Some(executionGraph) =>
case Some((executionGraph, _)) =>
Future {
executionGraph.cancel()
}
sender() ! JobCancelResult(JobResult.SUCCESS, null)
sender() ! CancellationSuccess(jobID)
case None =>
log.info(s"No job found with ID ${jobID}.")
sender() ! JobCancelResult(JobResult.ERROR, s"Cannot find job with ID ${jobID}")
sender() ! CancellationFailure(jobID, new IllegalArgumentException(s"No job found with " +
s"ID ${jobID}."))
}
}
......@@ -212,7 +214,7 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
Preconditions.checkNotNull(taskExecutionState)
currentJobs.get(taskExecutionState.getJobID) match {
case Some(executionGraph) => sender() ! executionGraph.updateState(taskExecutionState)
case Some((executionGraph, _)) => sender() ! executionGraph.updateState(taskExecutionState)
case None => log.error(s"Cannot find execution graph for ID ${taskExecutionState
.getJobID} to change state to" +
s" ${taskExecutionState.getExecutionState}.")
......@@ -222,7 +224,7 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
case RequestNextInputSplit(jobID, vertexID) => {
val nextInputSplit = currentJobs.get(jobID) match {
case Some(executionGraph) => executionGraph.getJobVertex(vertexID) match {
case Some((executionGraph,_)) => executionGraph.getJobVertex(vertexID) match {
case vertex: ExecutionJobVertex => vertex.getSplitAssigner match {
case splitAssigner: InputSplitAssigner => splitAssigner.getNextInputSplit(null)
case _ =>
......@@ -241,34 +243,54 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
sender() ! NextInputSplit(nextInputSplit)
}
case JobStatusChanged(executionGraph, newJobStatus, optionalMessage) => {
val jobID = executionGraph.getJobID
log.info(s"Status of job ${jobID} (${executionGraph.getJobName}) changed to " +
s"${newJobStatus}${optionalMessage}.")
case JobStatusChanged(jobID, newJobStatus, timeStamp, optionalMessage) => {
currentJobs.get(jobID) match {
case Some((executionGraph, jobInfo)) => executionGraph.getJobName
log.info(s"Status of job ${jobID} (${executionGraph.getJobName}) changed to " +
s"${newJobStatus}${optionalMessage}.")
if(newJobStatus.isTerminalState) {
jobInfo.end = timeStamp
if(!jobInfo.detach) {
newJobStatus match {
case JobStatus.FINISHED =>
val accumulatorResults = accumulatorManager.getJobAccumulatorResults(jobID)
jobInfo.client ! JobResultSuccess(jobID, jobInfo.duration, accumulatorResults)
case JobStatus.CANCELED =>
jobInfo.client ! JobResultCanceled(jobID, optionalMessage)
case JobStatus.FAILED =>
jobInfo.client ! JobResultFailed(jobID, optionalMessage)
case x => throw new IllegalArgumentException(s"$x is not a terminal state.")
}
}
if (Set(JobStatus.FINISHED, JobStatus.CANCELED, JobStatus.FAILED) contains newJobStatus) {
// send final job status to job termination listeners
jobTerminationListener.get(jobID) foreach {
listeners =>
listeners foreach {
_ ! CurrentJobStatus(jobID, newJobStatus)
finalJobStatusListener.get(jobID) foreach {
_ foreach {
_ ! CurrentJobStatus(jobID, newJobStatus)
}
}
}
currentJobs.remove(jobID)
try {
libraryCacheManager.unregister(jobID)
} catch {
case t: Throwable =>
log.error(t, s"Could not properly unregister job ${jobID} form the library cache.")
}
removeJob(jobID)
}
case None =>
removeJob(jobID)
}
}
case RequestFinalJobStatus(jobID) => {
currentJobs.get(jobID) match {
case Some(_) =>
val listeners = finalJobStatusListener.getOrElse(jobID, Set())
finalJobStatusListener += jobID -> (listeners + sender())
case None =>
archive ! RequestJobStatus(jobID)
}
}
case LookupConnectionInformation(connectionInformation, jobID, sourceChannelID) => {
currentJobs.get(jobID) match {
case Some(executionGraph) =>
case Some((executionGraph, _)) =>
sender() ! ConnectionInformation(executionGraph.lookupConnectionInfoAndDeployReceivers
(connectionInformation, sourceChannelID))
case None =>
......@@ -283,53 +305,26 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
(libraryCacheManager.getClassLoader(accumulatorEvent.getJobID)))
}
case RequestAccumulatorResult(jobID) => {
sender() ! new AccumulatorEvent(jobID, accumulatorManager.getJobAccumulators(jobID))
}
case RegisterJobStatusListener(jobID) => {
currentJobs.get(jobID) match {
case Some(executionGraph) =>
executionGraph.registerJobStatusListener(sender())
sender() ! CurrentJobStatus(jobID, executionGraph.getState)
case None =>
log.warning(s"There is no running job with job ID ${jobID}.")
sender() ! JobNotFound(jobID)
}
}
case RequestJobStatusWhenTerminated(jobID) => {
if (currentJobs.contains(jobID)) {
val listeners = jobTerminationListener.getOrElse(jobID, Set())
jobTerminationListener += jobID -> (listeners + sender())
} else {
eventCollector.tell(RequestJobStatus(jobID), sender())
}
}
case RequestJobStatus(jobID) => {
currentJobs.get(jobID) match {
case Some(executionGraph) => sender() ! CurrentJobStatus(jobID, executionGraph.getState)
case None => eventCollector.tell(RequestJobStatus(jobID), sender())
case Some((executionGraph,_)) => sender() ! CurrentJobStatus(jobID, executionGraph.getState)
case None => archive.tell(RequestJobStatus(jobID), sender())
}
}
case RequestRecentJobEvents => {
eventCollector.tell(RequestRecentJobEvents, sender())
}
case RequestRunningJobs => {
val runningJobs = currentJobs map {
case (jobID, (eg, jobInfo)) =>
RunningJob(jobID, eg.getState, eg.getStatusTimestamp(eg.getState), eg.getJobName)
}
case msg: RequestJobProgress => {
eventCollector forward msg
sender() ! RunningJobsResponse(runningJobs.toSeq)
}
case RequestBlobManagerPort => {
sender() ! libraryCacheManager.getBlobServerPort
}
case RequestPollingInterval => {
sender() ! recommendedPollingInterval
}
case Heartbeat(instanceID) => {
instanceManager.reportHeartBeat(instanceID)
}
......@@ -340,6 +335,20 @@ Int, cleanupInterval: Long) extends Actor with ActorLogMessages with ActorLoggin
context.unwatch(taskManager)
}
}
private def removeJob(jobID: JobID): Unit = {
currentJobs.remove(jobID) match {
case Some((eg, _)) => archive ! ArchiveExecutionGraph(jobID, eg)
case None =>
}
try {
libraryCacheManager.unregister(jobID)
} catch {
case t: Throwable =>
log.error(t, s"Could not properly unregister job ${jobID} form the library cache.")
}
}
}
object JobManager {
......@@ -360,9 +369,8 @@ object JobManager {
def initialize(args: Array[String]): (String, Int, Configuration) = {
val parser = new scopt.OptionParser[JobManagerCLIConfiguration]("jobmanager") {
head("flink jobmanager")
opt[String]("configDir") action { (x, c) =>
c.copy(configDir = x)
} text ("Specify configuration directory.")
opt[String]("configDir") action { (x, c) => c.copy(configDir = x) } text ("Specify " +
"configuration directory.")
}
parser.parse(args, JobManagerCLIConfiguration()) map {
......@@ -390,19 +398,16 @@ object JobManager {
(actorSystem, (startActor _).tupled(parseConfiguration(configuration)))
}
def parseConfiguration(configuration: Configuration): (Int, Boolean, Int, Long) = {
def parseConfiguration(configuration: Configuration): (Int, Boolean, Long) = {
val archiveCount = configuration.getInteger(ConfigConstants.JOB_MANAGER_WEB_ARCHIVE_COUNT,
ConfigConstants.DEFAULT_JOB_MANAGER_WEB_ARCHIVE_COUNT)
val profilingEnabled = configuration.getBoolean(ProfilingUtils.PROFILE_JOB_KEY, true)
val recommendedPollingInterval = configuration.getInteger(ConfigConstants
.JOBCLIENT_POLLING_INTERVAL_KEY,
ConfigConstants.DEFAULT_JOBCLIENT_POLLING_INTERVAL)
val cleanupInterval = configuration.getLong(ConfigConstants
.LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL,
ConfigConstants.DEFAULT_LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL) * 1000
(archiveCount, profilingEnabled, recommendedPollingInterval, cleanupInterval)
(archiveCount, profilingEnabled, cleanupInterval)
}
def startActorWithConfiguration(configuration: Configuration)(implicit actorSystem:
......@@ -410,12 +415,10 @@ object JobManager {
(startActor _).tupled(parseConfiguration(configuration))
}
def startActor(archiveCount: Int, profilingEnabled: Boolean, recommendedPollingInterval: Int,
cleanupInterval: Long)
def startActor(archiveCount: Int, profilingEnabled: Boolean, cleanupInterval: Long)
(implicit actorSystem:
ActorSystem): ActorRef = {
actorSystem.actorOf(Props(classOf[JobManager], archiveCount, profilingEnabled,
recommendedPollingInterval, cleanupInterval),
actorSystem.actorOf(Props(classOf[JobManager], archiveCount, profilingEnabled, cleanupInterval),
JOB_MANAGER_NAME)
}
......@@ -435,7 +438,7 @@ object JobManager {
AkkaUtils.getChild(jobManager, ARCHIVE_NAME)
}
def getJobManager(address: InetSocketAddress): ActorRef = {
def getJobManager(address: InetSocketAddress)(implicit system: ActorSystem): ActorRef = {
AkkaUtils.getReference(getAkkaURL(address.getHostName + ":" + address.getPort))
}
}
......@@ -32,17 +32,6 @@ import scala.collection.mutable.ListBuffer
class MemoryArchivist(private val max_entries: Int) extends Actor with ActorLogMessages with
ActorLogging with DecorateAsJava {
/**
* The map which stores all collected events until they are either
* fetched by the client or discarded.
*/
val collectedEvents = collection.mutable.HashMap[JobID, ListBuffer[AbstractEvent]]()
/**
* Map of recently started jobs with the time stamp of the last received job event.
*/
val oldJobs = collection.mutable.HashMap[JobID, RecentJobEvent]()
/**
* Map of execution graphs belonging to recently started jobs with the time stamp of the last
* received job event.
......@@ -53,30 +42,10 @@ ActorLogging with DecorateAsJava {
val lru = collection.mutable.Queue[JobID]()
override def receiveWithLogMessages: Receive = {
case ArchiveEvent(jobID, event) => {
val list = collectedEvents.getOrElseUpdate(jobID, ListBuffer())
list += event
cleanup(jobID)
}
case ArchiveJobEvent(jobID, event) => {
oldJobs.update(jobID, event)
cleanup(jobID)
}
case ArchiveExecutionGraph(jobID, graph) => {
graphs.update(jobID, graph)
cleanup(jobID)
}
case RequestJobStatus(jobID) => {
val response = oldJobs get jobID match {
case Some(recentJobEvent) => CurrentJobStatus(jobID, recentJobEvent.getJobStatus)
case None => JobNotFound(jobID)
}
sender() ! response
}
}
def cleanup(jobID: JobID): Unit = {
......@@ -86,8 +55,6 @@ ActorLogging with DecorateAsJava {
while (lru.size > max_entries) {
val removedJobID = lru.dequeue()
collectedEvents.remove(removedJobID)
oldJobs.remove(removedJobID)
graphs.remove(removedJobID)
}
}
......
......@@ -16,9 +16,8 @@
* limitations under the License.
*/
package org.apache.flink.runtime.jobmanager
package org.apache.flink.runtime;
import org.apache.flink.runtime.jobgraph.{JobID, JobStatus}
public enum ExecutionMode {
LOCAL, CLUSTER
}
case class RunningJob(jobID: JobID, jobStatus: JobStatus, timestamp: Long, jobName: String)
......@@ -23,7 +23,5 @@ import org.apache.flink.runtime.executiongraph.ExecutionGraph
import org.apache.flink.runtime.jobgraph.JobID
object ArchiveMessages {
case class ArchiveEvent(jobID: JobID, event: AbstractEvent)
case class ArchiveJobEvent(jobID: JobID, event: RecentJobEvent)
case class ArchiveExecutionGraph(jobID: JobID, graph: ExecutionGraph)
}
......@@ -26,7 +26,8 @@ object ExecutionGraphMessages {
case class ExecutionStateChanged(jobID: JobID, vertexID: JobVertexID, subtask: Int,
executionID: ExecutionAttemptID,
newExecutionState: ExecutionState, optionalMessage: String)
newExecutionState: ExecutionState,
timestamp: Long, optionalMessage: String)
sealed trait JobStatusResponse {
......@@ -35,9 +36,7 @@ object ExecutionGraphMessages {
case class CurrentJobStatus(jobID: JobID, status: JobStatus) extends JobStatusResponse
case class JobNotFound(jobID: JobID) extends JobStatusResponse
case class JobStatusChanged(executionGraph: ExecutionGraph, newJobStatus: JobStatus,
optionalMessage: String){
def jobID = executionGraph.getJobID
}
case class JobStatusChanged(jobID: JobID, newJobStatus: JobStatus, timestamp: Long,
optionalMessage: String)
}
......@@ -18,24 +18,9 @@
package org.apache.flink.runtime.messages
import org.apache.flink.runtime.event.job.AbstractEvent
import scala.collection.convert.DecorateAsJava
object JobResult extends Enumeration with DecorateAsJava {
type JobResult = Value
val SUCCESS, ERROR = Value
case class JobProgressResult(returnCode: JobResult, description: String,
events: List[AbstractEvent]) {
def asJavaList: java.util.List[AbstractEvent] = {
events.asJava
}
}
case class JobCancelResult(returnCode: JobResult, description: String)
case class JobSubmissionResult(returnCode: JobResult, description: String)
import org.apache.flink.runtime.jobgraph.JobGraph
object JobClientMessages {
case class SubmitJobAndWait(jobGraph: JobGraph, listenToEvents: Boolean = false)
case class SubmitJobDetached(jobGraph: JobGraph, listenToEvents: Boolean = false)
}
......@@ -23,12 +23,15 @@ import org.apache.flink.runtime.instance.InstanceConnectionInfo
import org.apache.flink.runtime.io.network.ConnectionInfoLookupResponse
import org.apache.flink.runtime.io.network.channels.ChannelID
import org.apache.flink.runtime.jobgraph.{JobVertexID, JobID, JobGraph}
import org.apache.flink.runtime.profiling.impl.types.ProfilingDataContainer
import org.apache.flink.runtime.jobmanager.RunningJob
import org.apache.flink.runtime.taskmanager.TaskExecutionState
import scala.collection.convert.{WrapAsScala, WrapAsJava}
object JobManagerMessages {
case class SubmitJob(jobGraph: JobGraph)
case class SubmitJob(jobGraph: JobGraph, listenToEvents: Boolean = false,
detach: Boolean = false)
case class CancelJob(jobID: JobID)
......@@ -43,12 +46,6 @@ object JobManagerMessages {
case class ReportAccumulatorResult(accumulatorEvent: AccumulatorEvent)
case class RequestAccumulatorResult(jobID: JobID)
case class RegisterJobStatusListener(jobID: JobID)
case class RequestJobStatusWhenTerminated(jobID: JobID)
case class RequestJobStatus(jobID: JobID)
case object RequestInstances
......@@ -57,8 +54,43 @@ object JobManagerMessages {
case object RequestAvailableSlots
case object RequestPollingInterval
case object RequestBlobManagerPort
case class RequestFinalJobStatus(jobID: JobID)
sealed trait JobResult{
def jobID: JobID
}
case class JobResultSuccess(jobID: JobID, runtime: Long, accumulatorResults: java.util.Map[String,
AnyRef]) extends JobResult {}
case class JobResultCanceled(jobID: JobID, msg: String)
case class JobResultFailed(jobID: JobID, msg:String)
sealed trait SubmissionResponse{
def jobID: JobID
}
case class SubmissionSuccess(jobID: JobID) extends SubmissionResponse
case class SubmissionFailure(jobID: JobID, cause: Throwable) extends SubmissionResponse
sealed trait CancellationResponse{
def jobID: JobID
}
case class CancellationSuccess(jobID: JobID) extends CancellationResponse
case class CancellationFailure(jobID: JobID, cause: Throwable) extends CancellationResponse
case object RequestRunningJobs
case class RunningJobsResponse(runningJobs: Seq[RunningJob]) {
def this() = this(Seq())
def asJavaList: java.util.List[RunningJob] = {
import scala.collection.JavaConversions.seqAsJavaList
seqAsJavaList(runningJobs)
}
}
}
......@@ -117,7 +117,8 @@ ActorLogMessages with ActorLogging {
}
}
case ExecutionStateChanged(_, vertexID, subtaskIndex, executionID, newExecutionState, _) => {
case ExecutionStateChanged(_, vertexID, subtaskIndex, executionID, newExecutionState,
_, _) => {
import ExecutionState._
environments.get(executionID) match {
......
......@@ -27,6 +27,7 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.apache.flink.runtime.testutils.CommonTestUtils;
import org.apache.flink.runtime.testutils.ServerTestUtils;
import org.apache.flink.util.StringUtils;
import org.junit.Test;
......@@ -55,14 +56,14 @@ public final class BlobKeyTest {
}
/**
* Tests the serialization/deserialization of BLOB keys using the regular {@link org.apache.flink.core.io.IOReadableWritable} API.
* Tests the serialization/deserialization of BLOB keys
*/
@Test
public void testSerialization() {
final BlobKey k1 = new BlobKey(KEY_ARRAY_1);
final BlobKey k2;
try {
k2 = ServerTestUtils.createCopy(k1);
k2 = CommonTestUtils.createCopySerializable(k1);
} catch (IOException ioe) {
fail(StringUtils.stringifyException(ioe));
return;
......
......@@ -23,20 +23,20 @@
</encoder>
</appender>
<root level="DEBUG">
<root level="WARN">
<appender-ref ref="STDOUT"/>
</root>
<!-- The following loggers are disabled during tests, because many tests deliberately
throw error to test failing scenarios. Logging those would overflow the log. -->
<!---->
<!--<logger name="org.apache.flink.runtime.operators.DataSinkTask" level="OFF"/>-->
<!--<logger name="org.apache.flink.runtime.operators.RegularPactTask" level="OFF"/>-->
<!--<logger name="org.apache.flink.runtime.client.JobClient" level="OFF"/>-->
<!--<logger name="org.apache.flink.runtime.taskmanager.Task" level="OFF"/>-->
<!--<logger name="org.apache.flink.runtime.jobmanager.JobManager" level="OFF"/>-->
<!--<logger name="org.apache.flink.runtime.testingUtils" level="OFF"/>-->
<!--<logger name="org.apache.flink.runtime.executiongraph.ExecutionGraph" level="OFF"/>-->
<!--<logger name="org.apache.flink.runtime.jobmanager.EventCollector" level="OFF"/>-->
<!--<logger name="org.apache.flink.runtime.instance.InstanceManager" level="OFF"/>-->
<logger name="org.apache.flink.runtime.operators.DataSinkTask" level="OFF"/>
<logger name="org.apache.flink.runtime.operators.RegularPactTask" level="OFF"/>
<logger name="org.apache.flink.runtime.client.JobClient" level="OFF"/>
<logger name="org.apache.flink.runtime.taskmanager.Task" level="OFF"/>
<logger name="org.apache.flink.runtime.jobmanager.JobManager" level="OFF"/>
<logger name="org.apache.flink.runtime.testingUtils" level="OFF"/>
<logger name="org.apache.flink.runtime.executiongraph.ExecutionGraph" level="OFF"/>
<logger name="org.apache.flink.runtime.jobmanager.EventCollector" level="OFF"/>
<logger name="org.apache.flink.runtime.instance.InstanceManager" level="OFF"/>
</configuration>
\ No newline at end of file
......@@ -19,16 +19,13 @@
package org.apache.flink.runtime.jobmanager
import akka.actor.ActorSystem
import akka.actor.Status.Success
import akka.testkit.{ImplicitSender, TestKit}
import org.apache.flink.runtime.jobgraph.{JobStatus, JobGraph, DistributionPattern,
import org.apache.flink.runtime.jobgraph.{JobGraph, DistributionPattern,
AbstractJobVertex}
import org.apache.flink.runtime.jobmanager.Tasks.{Receiver, Sender}
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup
import org.apache.flink.runtime.messages.ExecutionGraphMessages.CurrentJobStatus
import org.apache.flink.runtime.messages.JobManagerMessages.{RequestJobStatusWhenTerminated,
SubmitJob}
import org.apache.flink.runtime.messages.JobResult
import org.apache.flink.runtime.messages.JobResult.JobSubmissionResult
import org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob
import org.apache.flink.runtime.testingUtils.TestingUtils
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike}
import scala.collection.convert.WrapAsJava
......@@ -71,10 +68,8 @@ ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll with WrapA
try {
within(1 second) {
jm ! SubmitJob(jobGraph)
expectMsg(JobSubmissionResult(JobResult.SUCCESS, null))
jm ! RequestJobStatusWhenTerminated(jobGraph.getJobID)
expectMsg(CurrentJobStatus(jobGraph.getJobID, JobStatus.FINISHED))
expectMsg(Success(_))
expectMsg()
}
} finally {
cluster.stop()
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmanager
import akka.actor.ActorSystem
import akka.testkit.{ImplicitSender, TestKit}
import org.apache.flink.runtime.execution.librarycache.LibraryCacheManager
import org.apache.flink.runtime.jobgraph.{JobStatus, JobGraph, DistributionPattern,
import org.apache.flink.runtime.jobgraph.{JobGraph, DistributionPattern,
AbstractJobVertex}
import org.apache.flink.runtime.jobmanager.Tasks.{AgnosticBinaryReceiver, Receiver}
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup
import org.apache.flink.runtime.messages.ExecutionGraphMessages.CurrentJobStatus
import org.apache.flink.runtime.messages.JobManagerMessages.{RequestJobStatusWhenTerminated,
import org.apache.flink.runtime.messages.JobManagerMessages.{JobResultSuccess, SubmissionSuccess,
SubmitJob}
import org.apache.flink.runtime.messages.JobResult
import org.apache.flink.runtime.messages.JobResult.JobSubmissionResult
import org.apache.flink.runtime.taskmanager.TaskManagerTest.Sender
import org.apache.flink.runtime.testingUtils.TestingUtils
import org.scalatest.{Matchers, WordSpecLike, BeforeAndAfterAll}
......@@ -70,10 +66,10 @@ WordSpecLike with Matchers with BeforeAndAfterAll {
try {
within(1 second) {
jm ! SubmitJob(jobGraph)
expectMsg(new JobSubmissionResult(JobResult.SUCCESS, null))
expectMsg(SubmissionSuccess(jobGraph.getJobID))
expectMsgType[JobResultSuccess]
jm ! RequestJobStatusWhenTerminated(jobGraph.getJobID)
expectMsg(CurrentJobStatus(jobGraph.getJobID, JobStatus.FINISHED))
expectNoMsg()
}
} finally {
cluster.stop()
......@@ -114,10 +110,10 @@ WordSpecLike with Matchers with BeforeAndAfterAll {
try {
within(1 second) {
jm ! SubmitJob(jobGraph)
expectMsg(JobSubmissionResult(JobResult.SUCCESS, null))
expectMsg(SubmissionSuccess(jobGraph.getJobID))
expectMsgType[JobResultSuccess]
jm ! RequestJobStatusWhenTerminated(jobGraph.getJobID)
expectMsg(CurrentJobStatus(jobGraph.getJobID, JobStatus.FINISHED))
expectNoMsg()
}
} finally {
cluster.stop()
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmanager
import akka.actor.{PoisonPill, ActorSystem}
import akka.testkit.{ImplicitSender, TestKit}
import org.apache.flink.runtime.jobgraph.{JobStatus, JobGraph, DistributionPattern,
import org.apache.flink.runtime.jobgraph.{JobGraph, DistributionPattern,
AbstractJobVertex}
import org.apache.flink.runtime.jobmanager.Tasks.{BlockingReceiver, Sender}
import org.apache.flink.runtime.messages.ExecutionGraphMessages.CurrentJobStatus
import org.apache.flink.runtime.messages.JobManagerMessages.{RequestJobStatusWhenTerminated,
import org.apache.flink.runtime.messages.JobManagerMessages.{JobResultFailed, SubmissionSuccess,
SubmitJob}
import org.apache.flink.runtime.messages.JobResult
import org.apache.flink.runtime.messages.JobResult.JobSubmissionResult
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.{AllVerticesRunning,
WaitForAllVerticesToBeRunning}
import org.apache.flink.runtime.testingUtils.TestingUtils
......@@ -65,18 +62,16 @@ with WordSpecLike with Matchers with BeforeAndAfterAll {
try {
within(1 second) {
jm ! SubmitJob(jobGraph)
expectMsg(new JobSubmissionResult(JobResult.SUCCESS, null))
expectMsg(SubmissionSuccess(jobGraph.getJobID))
jm ! WaitForAllVerticesToBeRunning(jobID)
expectMsg(AllVerticesRunning(jobID))
// kill one task manager
taskManagers.get(0) ! PoisonPill
expectMsgType[JobResultFailed]
jm ! RequestJobStatusWhenTerminated(jobGraph.getJobID)
expectMsg(CurrentJobStatus(jobID, JobStatus.FAILED))
expectNoMsg()
}
}finally{
cluster.stop()
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmanager
import akka.actor.{PoisonPill, ActorSystem}
import akka.testkit.{ImplicitSender, TestKit}
import org.apache.flink.runtime.jobgraph.{JobStatus, JobGraph, DistributionPattern,
import org.apache.flink.runtime.jobgraph.{JobGraph, DistributionPattern,
AbstractJobVertex}
import org.apache.flink.runtime.jobmanager.Tasks.{Sender, BlockingReceiver}
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup
import org.apache.flink.runtime.messages.ExecutionGraphMessages.CurrentJobStatus
import org.apache.flink.runtime.messages.JobManagerMessages.{RequestJobStatusWhenTerminated,
import org.apache.flink.runtime.messages.JobManagerMessages.{JobResultFailed, SubmissionSuccess,
SubmitJob}
import org.apache.flink.runtime.messages.JobResult
import org.apache.flink.runtime.messages.JobResult.JobSubmissionResult
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.{AllVerticesRunning,
WaitForAllVerticesToBeRunning}
import org.apache.flink.runtime.testingUtils.TestingUtils
......@@ -73,7 +70,7 @@ ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll {
try{
within(1 second) {
jm ! SubmitJob(jobGraph)
expectMsg(new JobSubmissionResult(JobResult.SUCCESS, null))
expectMsg(SubmissionSuccess(jobGraph.getJobID))
jm ! WaitForAllVerticesToBeRunning(jobID)
expectMsg(AllVerticesRunning(jobID))
......@@ -81,9 +78,9 @@ ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll {
//kill task manager
taskManagers.get(0) ! PoisonPill
jm ! RequestJobStatusWhenTerminated(jobGraph.getJobID)
expectMsgType[JobResultFailed]
expectMsg(CurrentJobStatus(jobID, JobStatus.FAILED))
expectNoMsg()
}
}finally{
cluster.stop()
......
......@@ -27,7 +27,7 @@ import org.apache.flink.runtime.net.NetUtils
import org.apache.flink.runtime.taskmanager.TaskManager
class TestingCluster extends FlinkMiniCluster {
override def getConfiguration(userConfig: Configuration): Configuration = {
override def generateConfiguration(userConfig: Configuration): Configuration = {
val cfg = new Configuration()
cfg.setString(ConfigConstants.JOB_MANAGER_IPC_ADDRESS_KEY, "localhost")
cfg.setInteger(ConfigConstants.JOB_MANAGER_IPC_PORT_KEY, NetUtils.getAvailablePort)
......@@ -38,11 +38,9 @@ class TestingCluster extends FlinkMiniCluster {
}
override def startJobManager(system: ActorSystem, config: Configuration) = {
val (archiveCount, profiling, recommendedPollingInterval, cleanupInterval) =
JobManager.parseConfiguration(config)
val (archiveCount, profiling, cleanupInterval) = JobManager.parseConfiguration(config)
system.actorOf(Props(new JobManager(archiveCount, profiling, recommendedPollingInterval,
cleanupInterval) with
system.actorOf(Props(new JobManager(archiveCount, profiling, cleanupInterval) with
TestingJobManager),
JobManager.JOB_MANAGER_NAME)
}
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.testingUtils
......@@ -22,9 +22,8 @@ import akka.actor.{ActorRef, Props}
import org.apache.flink.runtime.ActorLogMessages
import org.apache.flink.runtime.execution.ExecutionState
import org.apache.flink.runtime.jobgraph.JobID
import org.apache.flink.runtime.jobmanager.{EventCollector, JobManager, MemoryArchivist}
import org.apache.flink.runtime.jobmanager.{JobManager, MemoryArchivist}
import org.apache.flink.runtime.messages.ExecutionGraphMessages.ExecutionStateChanged
import org.apache.flink.runtime.messages.JobManagerMessages.UpdateTaskExecutionState
import org.apache.flink.runtime.testingUtils.TestingJobManagerMessages.{AllVerticesRunning,
WaitForAllVerticesToBeRunning, ExecutionGraphFound, RequestExecutionGraph}
......@@ -38,9 +37,6 @@ trait TestingJobManager extends ActorLogMessages with WrapAsScala {
override def archiveProps = Props(new MemoryArchivist(archiveCount) with TestingMemoryArchivist)
override def eventCollectorProps = Props(new EventCollector(recommendedPollingInterval) with
TestingEventCollector)
abstract override def receiveWithLogMessages: Receive = {
receiveTestingMessages orElse super.receiveWithLogMessages
}
......@@ -48,21 +44,22 @@ trait TestingJobManager extends ActorLogMessages with WrapAsScala {
def receiveTestingMessages: Receive = {
case RequestExecutionGraph(jobID) =>
currentJobs.get(jobID) match {
case Some(executionGraph) => sender() ! ExecutionGraphFound(jobID, executionGraph)
case None => eventCollector.tell(RequestExecutionGraph(jobID), sender())
case Some((executionGraph, jobInfo)) => sender() ! ExecutionGraphFound(jobID,
executionGraph)
case None => archive.tell(RequestExecutionGraph(jobID), sender())
}
case WaitForAllVerticesToBeRunning(jobID) =>
if(checkIfAllVerticesRunning(jobID)){
sender() ! AllVerticesRunning(jobID)
}else{
currentJobs.get(jobID) match {
case Some(eg) => eg.registerExecutionListener(self)
case Some((eg, _)) => eg.registerExecutionListener(self)
case None =>
}
val waiting = waitForAllVerticesToBeRunning.getOrElse(jobID, Set[ActorRef]())
waitForAllVerticesToBeRunning += jobID -> (waiting + sender())
}
case ExecutionStateChanged(jobID, _, _, _, _, _) =>
case ExecutionStateChanged(jobID, _, _, _, _, _, _) =>
val cleanup = waitForAllVerticesToBeRunning.get(jobID) match {
case Some(listeners) if checkIfAllVerticesRunning(jobID) =>
for(listener <- listeners){
......@@ -80,7 +77,7 @@ trait TestingJobManager extends ActorLogMessages with WrapAsScala {
def checkIfAllVerticesRunning(jobID: JobID): Boolean = {
currentJobs.get(jobID) match {
case Some(eg) =>
case Some((eg, _)) =>
eg.getAllExecutionVertices.forall( _.getExecutionState == ExecutionState.RUNNING)
case None => false
}
......
......@@ -40,9 +40,9 @@ object TestingUtils {
s"""akka.daemonic = on
|akka.loggers = ["akka.event.slf4j.Slf4jLogger"]
|akka.loglevel = "INFO"
|akka.loglevel = "WARNING"
|akka.logging-filter = "akka.event.slf4j.Slf4jLoggingFilter"
|akka.stdout-loglevel = "INFO"
|akka.stdout-loglevel = "WARNING"
|akka.jvm-exit-on-fata-error = off
|akka.log-config-on-start = off
|akka.actor.serializers {
......
......@@ -60,7 +60,7 @@ public abstract class AbstractTestBase {
protected final Configuration config;
protected LocalFlinkMiniCluster executor;
private final List<File> tempFiles;
protected int taskManagerNumSlots = DEFAULT_TASK_MANAGER_NUM_SLOTS;
......@@ -83,6 +83,7 @@ public abstract class AbstractTestBase {
// --------------------------------------------------------------------------------------------
public void startCluster() throws Exception {
Thread.sleep(250);
this.executor = new LocalFlinkMiniCluster(null);
Configuration config = new Configuration();
config.setBoolean(ConfigConstants.FILESYSTEM_DEFAULT_OVERWRITE_KEY, true);
......
......@@ -20,6 +20,7 @@ package org.apache.flink.test.util;
import java.util.Comparator;
import akka.actor.ActorRef;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.Plan;
import org.apache.flink.compiler.DataStatistics;
......@@ -194,7 +195,7 @@ public abstract class JavaProgramTestBase extends AbstractTestBase {
private static final class TestEnvironment extends ExecutionEnvironment {
private final LocalFlinkMiniCluster executor;
private JobExecutionResult latestResult;
......@@ -211,10 +212,9 @@ public abstract class JavaProgramTestBase extends AbstractTestBase {
NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator();
JobGraph jobGraph = jgg.compileJobGraph(op);
JobClient client = this.executor.getJobClient(jobGraph);
client.setConsoleStreamForReporting(AbstractTestBase.getNullPrintStream());
JobExecutionResult result = client.submitJobAndWait();
ActorRef client = this.executor.getJobClient();
JobExecutionResult result = JobClient.submitJobAndWait(jobGraph, false, client);
this.latestResult = result;
return result;
}
......
......@@ -19,6 +19,7 @@
package org.apache.flink.test.util;
import akka.actor.ActorRef;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.Plan;
import org.apache.flink.compiler.DataStatistics;
......@@ -119,9 +120,8 @@ public abstract class RecordAPITestBase extends AbstractTestBase {
Assert.assertNotNull("Obtained null JobGraph", jobGraph);
try {
JobClient client = this.executor.getJobClient(jobGraph);
client.setConsoleStreamForReporting(getNullPrintStream());
this.jobExecutionResult = client.submitJobAndWait();
ActorRef client = this.executor.getJobClient();
this.jobExecutionResult = JobClient.submitJobAndWait(jobGraph, false, client);
}
catch(Exception e) {
System.err.println(e.getMessage());
......
......@@ -224,7 +224,7 @@ public class AccumulatorITCase extends JavaProgramTestBase {
/**
* Custom accumulator
*/
public static class SetAccumulator<T extends IOReadableWritable> implements Accumulator<T, Set<T>> {
public static class SetAccumulator<T extends IOReadableWritable> implements Accumulator<T, SerializableHashSet<T>> {
private static final long serialVersionUID = 1L;
......@@ -236,7 +236,7 @@ public class AccumulatorITCase extends JavaProgramTestBase {
}
@Override
public Set<T> getLocalValue() {
public SerializableHashSet<T> getLocalValue() {
return this.set;
}
......@@ -246,7 +246,7 @@ public class AccumulatorITCase extends JavaProgramTestBase {
}
@Override
public void merge(Accumulator<T, Set<T>> other) {
public void merge(Accumulator<T, SerializableHashSet<T>> other) {
// build union
this.set.addAll(((SetAccumulator<T>) other).getLocalValue());
}
......@@ -262,7 +262,7 @@ public class AccumulatorITCase extends JavaProgramTestBase {
}
@Override
public Accumulator<T, Set<T>> clone() {
public Accumulator<T, SerializableHashSet<T>> clone() {
SetAccumulator<T> result = new SetAccumulator<T>();
result.set.addAll(set);
......
......@@ -19,14 +19,19 @@
package org.apache.flink.test.cancelling;
import java.util.Iterator;
import java.util.concurrent.TimeUnit;
import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import akka.dispatch.ExecutionContexts;
import akka.pattern.Patterns;
import com.amazonaws.http.ExecutionContext;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.messages.JobResult;
import org.apache.flink.runtime.messages.JobResult.JobProgressResult;
import org.apache.flink.runtime.messages.JobResult.JobSubmissionResult;
import org.apache.flink.runtime.messages.JobResult.JobCancelResult;
import org.apache.flink.runtime.akka.AkkaUtils;
import org.apache.flink.runtime.client.JobExecutionException;
import org.apache.flink.runtime.messages.JobClientMessages;
import org.apache.flink.runtime.messages.JobManagerMessages;
import org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster;
import org.junit.Assert;
......@@ -37,15 +42,14 @@ import org.apache.flink.compiler.DataStatistics;
import org.apache.flink.compiler.PactCompiler;
import org.apache.flink.compiler.plan.OptimizedPlan;
import org.apache.flink.compiler.plantranslate.NepheleJobGraphGenerator;
import org.apache.flink.runtime.client.JobClient;
import org.apache.flink.runtime.event.job.AbstractEvent;
import org.apache.flink.runtime.event.job.JobEvent;
import org.apache.flink.runtime.jobgraph.JobGraph;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.util.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.junit.After;
import org.junit.Before;
import scala.concurrent.Await;
import scala.concurrent.Future;
import scala.concurrent.duration.FiniteDuration;
/**
*
......@@ -108,124 +112,37 @@ public abstract class CancellingTestBase {
try {
// submit job
final JobGraph jobGraph = getJobGraph(plan);
final ActorRef client = this.executor.getJobClient();
final ActorSystem actorSystem = executor.getJobClientActorSystem();
boolean jobSuccessfullyCancelled = false;
final long startingTime = System.currentTimeMillis();
long cancelTime = -1L;
final JobClient client = this.executor.getJobClient(jobGraph);
final JobSubmissionResult submissionResult = client.submitJob();
if (submissionResult.returnCode() != JobResult.SUCCESS()) {
throw new IllegalStateException(submissionResult.description());
}
final int interval = client.getRecommendedPollingInterval();
final long sleep = interval * 1000L;
Thread.sleep(sleep / 2);
long lastProcessedEventSequenceNumber = -1L;
while (true) {
if (Thread.interrupted()) {
throw new IllegalStateException("Job client has been interrupted");
}
final long now = System.currentTimeMillis();
if (cancelTime < 0L) {
// Cancel job
if (startingTime + msecsTillCanceling < now) {
LOG.info("Issuing cancel request");
final JobCancelResult jcr = client.cancelJob();
if (jcr == null) {
throw new IllegalStateException("Return value of cancelJob is null!");
}
Future<Object> result = Patterns.ask(client, new JobClientMessages.SubmitJobAndWait
(jobGraph, false), AkkaUtils.FUTURE_TIMEOUT());
if (jcr.returnCode() != JobResult.SUCCESS()) {
throw new IllegalStateException(jcr.description());
}
// Save when the cancel request has been issued
cancelTime = now;
}
} else {
// Job has already been canceled
if (cancelTime + maxTimeTillCanceled < now) {
throw new IllegalStateException("Cancelling of job took " + (now - cancelTime)
+ " milliseconds, only " + maxTimeTillCanceled + " milliseconds are allowed");
}
}
final JobProgressResult jobProgressResult = client.getJobProgress();
if (jobProgressResult == null) {
throw new IllegalStateException("Returned job progress is unexpectedly null!");
}
if (jobProgressResult.returnCode() == JobResult.ERROR()) {
throw new IllegalStateException("Could not retrieve job progress: "
+ jobProgressResult.description());
}
boolean exitLoop = false;
final Iterator<AbstractEvent> it = jobProgressResult.asJavaList().iterator();
while (it.hasNext()) {
final AbstractEvent event = it.next();
// Did we already process that event?
if (lastProcessedEventSequenceNumber >= event.getSequenceNumber()) {
continue;
}
lastProcessedEventSequenceNumber = event.getSequenceNumber();
// Check if we can exit the loop
if (event instanceof JobEvent) {
final JobEvent jobEvent = (JobEvent) event;
final JobStatus jobStatus = jobEvent.getCurrentJobStatus();
switch (jobStatus) {
case FINISHED:
throw new IllegalStateException("Job finished successfully");
case FAILED:
throw new IllegalStateException("Job failed");
case CANCELED:
exitLoop = true;
break;
case RUNNING:
case CANCELLING:
case FAILING:
case CREATED:
break;
actorSystem.scheduler().scheduleOnce(new FiniteDuration(msecsTillCanceling,
TimeUnit.MILLISECONDS), client, new JobManagerMessages.CancelJob(jobGraph.getJobID()),
actorSystem.dispatcher(), ActorRef.noSender());
case RESTARTING:
throw new IllegalStateException("Job restarted");
}
}
if (exitLoop) {
break;
}
}
if (exitLoop) {
break;
try {
Await.result(result, AkkaUtils.AWAIT_DURATION());
} catch (JobExecutionException exception) {
if (!exception.isJobCanceledByUser()) {
throw new IllegalStateException("Job Failed.");
}
Thread.sleep(sleep);
jobSuccessfullyCancelled = true;
}
} catch (Exception e) {
LOG.error("Exception while running runAndCancelJob.", e);
if (!jobSuccessfullyCancelled) {
throw new IllegalStateException("Job was not successfully cancelled.");
}
}catch(Exception e){
LOG.error("Exception found in runAndCancelJob.", e);
Assert.fail(StringUtils.stringifyException(e));
}
}
private JobGraph getJobGraph(final Plan plan) throws Exception {
......
......@@ -45,12 +45,6 @@ public class JobGraphUtils {
private JobGraphUtils() {}
public static void submit(JobGraph graph, Configuration nepheleConfig) throws IOException, JobExecutionException {
JobClient client = new JobClient(graph, nepheleConfig, JobGraphUtils.class.getClassLoader());
client.submitJobAndWait();
}
public static <T extends FileInputFormat<?>> InputFormatVertex createInput(T stub, String path, String name, JobGraph graph,
int degreeOfParallelism)
{
......
......@@ -194,7 +194,7 @@ public class WordCountAccumulators implements Program, ProgramDescription {
/**
* Custom accumulator
*/
public static class SetAccumulator<T extends Value> implements Accumulator<T, Set<T>> {
public static class SetAccumulator<T extends Value> implements Accumulator<T, SerializableHashSet<T>> {
private static final long serialVersionUID = 1L;
......@@ -206,7 +206,7 @@ public class WordCountAccumulators implements Program, ProgramDescription {
}
@Override
public Set<T> getLocalValue() {
public SerializableHashSet<T> getLocalValue() {
return this.set;
}
......@@ -216,7 +216,7 @@ public class WordCountAccumulators implements Program, ProgramDescription {
}
@Override
public void merge(Accumulator<T, Set<T>> other) {
public void merge(Accumulator<T, SerializableHashSet<T>> other) {
// build union
this.set.addAll(((SetAccumulator<T>) other).getLocalValue());
}
......@@ -232,7 +232,7 @@ public class WordCountAccumulators implements Program, ProgramDescription {
}
@Override
public Accumulator<T, Set<T>> clone() {
public Accumulator<T, SerializableHashSet<T>> clone() {
SetAccumulator<T> result = new SetAccumulator<T>();
result.set.addAll(set);
......
......@@ -18,6 +18,7 @@
package org.apache.flink.test.util;
import akka.actor.ActorRef;
import org.apache.flink.runtime.minicluster.LocalFlinkMiniCluster;
import org.junit.Assert;
......@@ -142,11 +143,11 @@ public abstract class FailingTestBase extends RecordAPITestBase {
*/
@Override
public void run() {
ActorRef client = this.executor.getJobClient();
try {
// submit failing job
JobClient client = this.executor.getJobClient(this.failingJob);
client.setConsoleStreamForReporting(AbstractTestBase.getNullPrintStream());
client.submitJobAndWait();
JobClient.submitJobAndWait(this.failingJob, false, client);
this.error = new Exception("The job did not fail.");
} catch(JobExecutionException jee) {
......@@ -158,9 +159,7 @@ public abstract class FailingTestBase extends RecordAPITestBase {
try {
// submit working job
JobClient client = this.executor.getJobClient(this.job);
client.setConsoleStreamForReporting(AbstractTestBase.getNullPrintStream());
client.submitJobAndWait();
JobClient.submitJobAndWait(this.job, false, client);
} catch (Exception e) {
this.error = e;
}
......
......@@ -17,7 +17,7 @@
################################################################################
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=DEBUG, A1
log4j.rootLogger=WARN, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
......
......@@ -23,7 +23,7 @@
</encoder>
</appender>
<root level="DEBUG">
<root level="WARN">
<appender-ref ref="STDOUT"/>
</root>
......
......@@ -448,6 +448,18 @@ under the License.
</profile>
</profiles>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
<exclusions>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
</exclusions>
<reporting>
<plugins>
<!-- execution of Unit Tests -->
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册