diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs index a3845cf0e8e7e3fa61c64febc8235281e30b7803..649b16f17a9298387c1e707f66acbffdce90f661 100644 --- a/src/librustc_trans/back/write.rs +++ b/src/librustc_trans/back/write.rs @@ -764,7 +764,7 @@ pub fn start_async_translation(sess: &Session, }); let (shared_emitter, shared_emitter_main) = SharedEmitter::new(); - let (trans_worker_send, _trans_worker_receive) = channel(); + let (trans_worker_send, trans_worker_receive) = channel(); let (coordinator_send, coordinator_receive) = channel(); let coordinator_thread = start_executing_work(sess, @@ -792,6 +792,7 @@ pub fn start_async_translation(sess: &Session, time_graph, output_filenames: crate_output.clone(), coordinator_send, + trans_worker_receive, shared_emitter_main, future: coordinator_thread } @@ -987,7 +988,7 @@ pub fn dump_incremental_data(trans: &CrateTranslation) { eprintln!("incremental: re-using {} out of {} modules", reuse, trans.modules.len()); } -pub struct WorkItem { +struct WorkItem { mtrans: ModuleTranslation, config: ModuleConfig, output_names: OutputFilenames @@ -1074,9 +1075,11 @@ enum Message { result: Result, worker_id: usize, }, - WorkItem(WorkItem), - CheckErrorMessages, - TranslationDone, + TranslationDone { + llvm_work_item: WorkItem, + is_last: bool + }, + TranslateItem, } struct Diagnostic { @@ -1085,6 +1088,13 @@ struct Diagnostic { lvl: Level, } +#[derive(PartialEq, Clone, Copy, Debug)] +enum TransWorkerState { + Idle, + Translating, + LLVMing, +} + fn start_executing_work(sess: &Session, shared_emitter: SharedEmitter, trans_worker_send: Sender, @@ -1189,7 +1199,6 @@ fn start_executing_work(sess: &Session, // Before that work finishes, however, we may acquire a token. In that case // we actually wastefully acquired the token, so we relinquish it back to // the jobserver. - thread::spawn(move || { let mut worker_id_counter = 0; let mut free_worker_ids = Vec::new(); @@ -1211,13 +1220,74 @@ fn start_executing_work(sess: &Session, let mut work_items = Vec::new(); let mut tokens = Vec::new(); + let mut trans_worker_state = TransWorkerState::Idle; let mut running = 0; - while !translation_done || work_items.len() > 0 || running > 0 { + while !translation_done || + work_items.len() > 0 || + running > 0 || + trans_worker_state != TransWorkerState::Idle { + + if !translation_done { + if trans_worker_state == TransWorkerState::Idle { + // Translation is not done yet, so there are two things the + // translation worker could do: + // + // (1) Translate another CGU + // (2) Run an already translated CGU through LLVM + // + // Option (2) makes sense if there's already enough work for + // all the other workers. In that case it's better to run + // a CGU through LLVM, so its resources can be freed. + // + // However, it's not trivial to determines what "enough work + // for all the other workers" means because: + // + // (1) We don't know how long the currently working workers + // will need to finish their work package, and + // (2) we don't know how many idle workers would be available + // because that is dynamically decided by the jobserver. + // + // TODO: Come up with a useful heuristic. + if work_items.len() <= 4 { + trans_worker_send.send(Message::TranslateItem).unwrap(); + trans_worker_state = TransWorkerState::Translating; + } else { + let item = work_items.pop().unwrap(); + let cgcx = CodegenContext { + worker: TRANS_WORKER_ID, + .. cgcx.clone() + }; + trans_worker_state = TransWorkerState::LLVMing; + spawn_work(cgcx, item); + } + } + } else { + match trans_worker_state { + TransWorkerState::Idle => { + if let Some(item) = work_items.pop() { + let cgcx = CodegenContext { + worker: TRANS_WORKER_ID, + .. cgcx.clone() + }; + + trans_worker_state = TransWorkerState::LLVMing; + spawn_work(cgcx, item); + } + } + TransWorkerState::Translating => { + bug!("trans worker should not be translating after \ + translation was already completed") + } + TransWorkerState::LLVMing => { + // Already making good use of that token + } + } + } // Spin up what work we can, only doing this while we've got available // parallelism slots and work left to spawn. - while work_items.len() > 0 && running < tokens.len() + 1 { + while work_items.len() > 0 && running < tokens.len() { let item = work_items.pop().unwrap(); let worker_id = get_worker_id(&mut free_worker_ids); @@ -1231,7 +1301,7 @@ fn start_executing_work(sess: &Session, } // Relinquish accidentally acquired extra tokens - tokens.truncate(running.saturating_sub(1)); + tokens.truncate(running); match coordinator_receive.recv().unwrap() { // Save the token locally and the next turn of the loop will use @@ -1242,15 +1312,25 @@ fn start_executing_work(sess: &Session, tokens.push(token); } else { shared_emitter.fatal("failed to acquire jobserver token"); - drop(trans_worker_send.send(Message::CheckErrorMessages)); // Exit the coordinator thread panic!() } } - Message::WorkItem(work_item) => { - work_items.push(work_item); - helper.request_token(); + Message::TranslationDone { llvm_work_item, is_last } => { + work_items.insert(0, llvm_work_item); + + if is_last { + // If this is the last, don't request a token because + // the trans worker thread will be free to handle this + // immediately. + translation_done = true; + } else { + helper.request_token(); + } + + assert_eq!(trans_worker_state, TransWorkerState::Translating); + trans_worker_state = TransWorkerState::Idle; } // If a thread exits successfully then we drop a token associated @@ -1262,10 +1342,14 @@ fn start_executing_work(sess: &Session, // Note that if the thread failed that means it panicked, so we // abort immediately. Message::Done { result: Ok(compiled_module), worker_id } => { - drop(tokens.pop()); - running -= 1; - free_worker_ids.push(worker_id); - drop(trans_worker_send.send(Message::CheckErrorMessages)); + if worker_id == TRANS_WORKER_ID { + assert_eq!(trans_worker_state, TransWorkerState::LLVMing); + trans_worker_state = TransWorkerState::Idle; + } else { + drop(tokens.pop()); + running -= 1; + free_worker_ids.push(worker_id); + } match compiled_module.kind { ModuleKind::Regular => { @@ -1283,15 +1367,11 @@ fn start_executing_work(sess: &Session, } Message::Done { result: Err(()), worker_id: _ } => { shared_emitter.fatal("aborting due to worker thread panic"); - drop(trans_worker_send.send(Message::CheckErrorMessages)); // Exit the coordinator thread panic!() } - Message::TranslationDone => { - translation_done = true; - } - msg @ Message::CheckErrorMessages => { - bug!("unexpected message: {:?}", msg); + Message::TranslateItem => { + bug!("the coordinator should not receive translation requests") } } } @@ -1316,10 +1396,6 @@ fn spawn_work(cgcx: CodegenContext, work: WorkItem) { let depth = time_depth(); thread::spawn(move || { - let _timing_guard = cgcx.time_graph - .as_ref() - .map(|tg| tg.start(time_graph::TimelineId(cgcx.worker), - LLVM_WORK_PACKAGE_KIND)); set_time_depth(depth); // Set up a destructor which will fire off a message that we're done as @@ -1362,7 +1438,13 @@ fn drop(&mut self) { // we just ignore the result and then send off our message saying that // we're done, which if `execute_work_item` failed is unlikely to be // seen by the main thread, but hey we might as well try anyway. - bomb.result = Some(execute_work_item(&cgcx, work).unwrap()); + bomb.result = { + let _timing_guard = cgcx.time_graph + .as_ref() + .map(|tg| tg.start(time_graph::TimelineId(cgcx.worker), + LLVM_WORK_PACKAGE_KIND)); + Some(execute_work_item(&cgcx, work).unwrap()) + }; }); } @@ -1578,6 +1660,7 @@ pub struct OngoingCrateTranslation { time_graph: Option, coordinator_send: Sender, + trans_worker_receive: Receiver, shared_emitter_main: SharedEmitterMain, future: thread::JoinHandle, } @@ -1645,25 +1728,49 @@ pub fn join(self, sess: &Session) -> CrateTranslation { pub fn submit_translated_module_to_llvm(&self, sess: &Session, - mtrans: ModuleTranslation) { + mtrans: ModuleTranslation, + is_last: bool) { let module_config = match mtrans.kind { ModuleKind::Regular => self.regular_module_config.clone(sess), ModuleKind::Metadata => self.metadata_module_config.clone(sess), ModuleKind::Allocator => self.allocator_module_config.clone(sess), }; - let work_item = build_work_item(mtrans, - module_config, - self.output_filenames.clone()); + let llvm_work_item = build_work_item(mtrans, + module_config, + self.output_filenames.clone()); - drop(self.coordinator_send.send(Message::WorkItem(work_item))); + drop(self.coordinator_send.send(Message::TranslationDone { + llvm_work_item, + is_last + })); } - pub fn signal_translation_done(&self) { - drop(self.coordinator_send.send(Message::TranslationDone)); + pub fn submit_pre_translated_module_to_llvm(&self, + sess: &Session, + mtrans: ModuleTranslation, + is_last: bool) { + self.wait_for_signal_to_translate_item(); + self.check_for_errors(sess); + self.submit_translated_module_to_llvm(sess, mtrans, is_last); } pub fn check_for_errors(&self, sess: &Session) { self.shared_emitter_main.check(sess, false); } + + pub fn wait_for_signal_to_translate_item(&self) { + match self.trans_worker_receive.recv() { + Ok(Message::TranslateItem) => { + // Nothing to do + } + Ok(message) => { + panic!("unexpected message: {:?}", message) + } + Err(_) => { + // One of the LLVM threads must have panicked, fall through so + // error handling can be reached. + } + } + } } diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs index 0b82ac71c33d3657d26a055625071c275cbd5496..2d1f43aff571becba6eacbd19429581d8d00af64 100644 --- a/src/librustc_trans/base.rs +++ b/src/librustc_trans/base.rs @@ -981,14 +981,15 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, linker_info, false); - ongoing_translation.submit_translated_module_to_llvm(tcx.sess, metadata_module); - ongoing_translation.signal_translation_done(); + ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess, metadata_module, true); assert_and_save_dep_graph(tcx, incremental_hashes_map, metadata_incr_hashes, link_meta); + ongoing_translation.check_for_errors(tcx.sess); + return ongoing_translation; } @@ -1032,35 +1033,87 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, linker_info, no_integrated_as); - ongoing_translation.submit_translated_module_to_llvm(tcx.sess, metadata_module); + // Translate an allocator shim, if any + // + // If LTO is enabled and we've got some previous LLVM module we translated + // above, then we can just translate directly into that LLVM module. If not, + // however, we need to create a separate module and trans into that. Note + // that the separate translation is critical for the standard library where + // the rlib's object file doesn't have allocator functions but the dylib + // links in an object file that has allocator functions. When we're + // compiling a final LTO artifact, though, there's no need to worry about + // this as we're not working with this dual "rlib/dylib" functionality. + let allocator_module = if tcx.sess.lto() { + None + } else if let Some(kind) = tcx.sess.allocator_kind.get() { + unsafe { + let (llcx, llmod) = + context::create_context_and_module(tcx.sess, "allocator"); + let modules = ModuleLlvm { + llmod: llmod, + llcx: llcx, + }; + time(tcx.sess.time_passes(), "write allocator module", || { + allocator::trans(tcx, &modules, kind) + }); + + Some(ModuleTranslation { + name: link::ALLOCATOR_MODULE_NAME.to_string(), + symbol_name_hash: 0, // we always rebuild allocator shims + source: ModuleSource::Translated(modules), + kind: ModuleKind::Allocator, + }) + } + } else { + None + }; + + if let Some(allocator_module) = allocator_module { + ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess, allocator_module, false); + } + + let codegen_unit_count = codegen_units.len(); + ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess, + metadata_module, + codegen_unit_count == 0); let translation_items = Arc::new(translation_items); let mut all_stats = Stats::default(); let mut module_dispositions = tcx.sess.opts.incremental.as_ref().map(|_| Vec::new()); - for cgu in codegen_units.into_iter() { + for (cgu_index, cgu) in codegen_units.into_iter().enumerate() { + ongoing_translation.wait_for_signal_to_translate_item(); ongoing_translation.check_for_errors(tcx.sess); - let _timing_guard = time_graph - .as_ref() - .map(|time_graph| time_graph.start(write::TRANS_WORKER_TIMELINE, - write::TRANS_WORK_PACKAGE_KIND)); - - let dep_node = cgu.work_product_dep_node(); - let ((stats, module), _) = - tcx.dep_graph.with_task(dep_node, - AssertDepGraphSafe(&shared_ccx), - AssertDepGraphSafe((cgu, - translation_items.clone(), - exported_symbols.clone())), - module_translation); - all_stats.extend(stats); - - if let Some(ref mut module_dispositions) = module_dispositions { - module_dispositions.push(module.disposition()); - } - ongoing_translation.submit_translated_module_to_llvm(tcx.sess, module); + let module = { + let _timing_guard = time_graph + .as_ref() + .map(|time_graph| time_graph.start(write::TRANS_WORKER_TIMELINE, + write::TRANS_WORK_PACKAGE_KIND)); + let dep_node = cgu.work_product_dep_node(); + let ((stats, module), _) = + tcx.dep_graph.with_task(dep_node, + AssertDepGraphSafe(&shared_ccx), + AssertDepGraphSafe((cgu, + translation_items.clone(), + exported_symbols.clone())), + module_translation); + all_stats.extend(stats); + + if let Some(ref mut module_dispositions) = module_dispositions { + module_dispositions.push(module.disposition()); + } + + module + }; + + let is_last_cgu = (cgu_index + 1) == codegen_unit_count; + + ongoing_translation.submit_translated_module_to_llvm(tcx.sess, + module, + is_last_cgu); + ongoing_translation.check_for_errors(tcx.sess); } if let Some(module_dispositions) = module_dispositions { @@ -1229,47 +1282,7 @@ fn module_translation<'a, 'tcx>( } } - // Translate an allocator shim, if any - // - // If LTO is enabled and we've got some previous LLVM module we translated - // above, then we can just translate directly into that LLVM module. If not, - // however, we need to create a separate module and trans into that. Note - // that the separate translation is critical for the standard library where - // the rlib's object file doesn't have allocator functions but the dylib - // links in an object file that has allocator functions. When we're - // compiling a final LTO artifact, though, there's no need to worry about - // this as we're not working with this dual "rlib/dylib" functionality. - let allocator_module = if tcx.sess.lto() { - None - } else if let Some(kind) = tcx.sess.allocator_kind.get() { - unsafe { - let (llcx, llmod) = - context::create_context_and_module(tcx.sess, "allocator"); - let modules = ModuleLlvm { - llmod: llmod, - llcx: llcx, - }; - time(tcx.sess.time_passes(), "write allocator module", || { - allocator::trans(tcx, &modules, kind) - }); - - Some(ModuleTranslation { - name: link::ALLOCATOR_MODULE_NAME.to_string(), - symbol_name_hash: 0, // we always rebuild allocator shims - source: ModuleSource::Translated(modules), - kind: ModuleKind::Allocator, - }) - } - } else { - None - }; - - if let Some(allocator_module) = allocator_module { - ongoing_translation.submit_translated_module_to_llvm(tcx.sess, allocator_module); - } - ongoing_translation.check_for_errors(tcx.sess); - ongoing_translation.signal_translation_done(); assert_and_save_dep_graph(tcx, incremental_hashes_map,