async-llvm(23): Let the main thread also do LLVM work in order to reduce memory pressure.

f5acc392 · Michael Woerister · 88192785 · f5acc392 · f5acc392
隐藏空白更改
内联并排

Showing with 218 addition and 98 deletion

src/librustc_trans/back/write.rs src/librustc_trans/back/write.rs +142 -35

src/librustc_trans/base.rs src/librustc_trans/base.rs +76 -63

未找到文件。
--- a/src/librustc_trans/back/write.rs
+++ b/src/librustc_trans/back/write.rs
@@ -764,7 +764,7 @@ pub fn start_async_translation(sess: &Session,
    });

    let (shared_emitter, shared_emitter_main) = SharedEmitter::new();
-    let (trans_worker_send, _trans_worker_receive) = channel();
+    let (trans_worker_send, trans_worker_receive) = channel();
    let (coordinator_send, coordinator_receive) = channel();

    let coordinator_thread = start_executing_work(sess,
@@ -792,6 +792,7 @@ pub fn start_async_translation(sess: &Session,
        time_graph,
        output_filenames: crate_output.clone(),
        coordinator_send,
+        trans_worker_receive,
        shared_emitter_main,
        future: coordinator_thread
    }
@@ -987,7 +988,7 @@ pub fn dump_incremental_data(trans: &CrateTranslation) {
    eprintln!("incremental: re-using {} out of {} modules", reuse, trans.modules.len());
 }

-pub struct WorkItem {
+struct WorkItem {
    mtrans: ModuleTranslation,
    config: ModuleConfig,
    output_names: OutputFilenames
@@ -1074,9 +1075,11 @@ enum Message {
        result: Result<CompiledModule, ()>,
        worker_id: usize,
    },
-    WorkItem(WorkItem),
-    CheckErrorMessages,
-    TranslationDone,
+    TranslationDone {
+        llvm_work_item: WorkItem,
+        is_last: bool
+    },
+    TranslateItem,
 }

 struct Diagnostic {
@@ -1085,6 +1088,13 @@ struct Diagnostic {
    lvl: Level,
 }

+#[derive(PartialEq, Clone, Copy, Debug)]
+enum TransWorkerState {
+    Idle,
+    Translating,
+    LLVMing,
+}
+
 fn start_executing_work(sess: &Session,
                        shared_emitter: SharedEmitter,
                        trans_worker_send: Sender<Message>,
@@ -1189,7 +1199,6 @@ fn start_executing_work(sess: &Session,
    // Before that work finishes, however, we may acquire a token. In that case
    // we actually wastefully acquired the token, so we relinquish it back to
    // the jobserver.
-
    thread::spawn(move || {
        let mut worker_id_counter = 0;
        let mut free_worker_ids = Vec::new();
@@ -1211,13 +1220,74 @@ fn start_executing_work(sess: &Session,
        let mut work_items = Vec::new();
        let mut tokens = Vec::new();

+        let mut trans_worker_state = TransWorkerState::Idle;
        let mut running = 0;

-        while !translation_done || work_items.len() > 0 || running > 0 {
+        while !translation_done ||
+              work_items.len() > 0 ||
+              running > 0 ||
+              trans_worker_state != TransWorkerState::Idle {
+
+            if !translation_done {
+                if trans_worker_state == TransWorkerState::Idle {
+                    // Translation is not done yet, so there are two things the
+                    // translation worker could do:
+                    //
+                    // (1) Translate another CGU
+                    // (2) Run an already translated CGU through LLVM
+                    //
+                    // Option (2) makes sense if there's already enough work for
+                    // all the other workers. In that case it's better to run
+                    // a CGU through LLVM, so its resources can be freed.
+                    //
+                    // However, it's not trivial to determines what "enough work
+                    // for all the other workers" means because:
+                    //
+                    // (1) We don't know how long the currently working workers
+                    //     will need to finish their work package, and
+                    // (2) we don't know how many idle workers would be available
+                    //     because that is dynamically decided by the jobserver.
+                    //
+                    // TODO: Come up with a useful heuristic.
+                    if work_items.len() <= 4 {
+                        trans_worker_send.send(Message::TranslateItem).unwrap();
+                        trans_worker_state = TransWorkerState::Translating;
+                    } else {
+                        let item = work_items.pop().unwrap();
+                        let cgcx = CodegenContext {
+                            worker: TRANS_WORKER_ID,
+                            .. cgcx.clone()
+                        };
+                        trans_worker_state = TransWorkerState::LLVMing;
+                        spawn_work(cgcx, item);
+                    }
+                }
+            } else {
+                match trans_worker_state {
+                    TransWorkerState::Idle => {
+                        if let Some(item) = work_items.pop() {
+                            let cgcx = CodegenContext {
+                                worker: TRANS_WORKER_ID,
+                                .. cgcx.clone()
+                            };
+
+                            trans_worker_state = TransWorkerState::LLVMing;
+                            spawn_work(cgcx, item);
+                        }
+                    }
+                    TransWorkerState::Translating => {
+                        bug!("trans worker should not be translating after \
+                              translation was already completed")
+                    }
+                    TransWorkerState::LLVMing => {
+                        // Already making good use of that token
+                    }
+                }
+            }

            // Spin up what work we can, only doing this while we've got available
            // parallelism slots and work left to spawn.
-            while work_items.len() > 0 && running < tokens.len() + 1 {
+            while work_items.len() > 0 && running < tokens.len() {
                let item = work_items.pop().unwrap();
                let worker_id = get_worker_id(&mut free_worker_ids);

@@ -1231,7 +1301,7 @@ fn start_executing_work(sess: &Session,
            }

            // Relinquish accidentally acquired extra tokens
-            tokens.truncate(running.saturating_sub(1));
+            tokens.truncate(running);

            match coordinator_receive.recv().unwrap() {
                // Save the token locally and the next turn of the loop will use
@@ -1242,15 +1312,25 @@ fn start_executing_work(sess: &Session,
                        tokens.push(token);
                    } else {
                        shared_emitter.fatal("failed to acquire jobserver token");
-                        drop(trans_worker_send.send(Message::CheckErrorMessages));
                        // Exit the coordinator thread
                        panic!()
                    }
                }

-                Message::WorkItem(work_item) => {
-                    work_items.push(work_item);
-                    helper.request_token();
+                Message::TranslationDone { llvm_work_item, is_last } => {
+                    work_items.insert(0, llvm_work_item);
+
+                    if is_last {
+                        // If this is the last, don't request a token because
+                        // the trans worker thread will be free to handle this
+                        // immediately.
+                        translation_done = true;
+                    } else {
+                        helper.request_token();
+                    }
+
+                    assert_eq!(trans_worker_state, TransWorkerState::Translating);
+                    trans_worker_state = TransWorkerState::Idle;
                }

                // If a thread exits successfully then we drop a token associated
@@ -1262,10 +1342,14 @@ fn start_executing_work(sess: &Session,
                // Note that if the thread failed that means it panicked, so we
                // abort immediately.
                Message::Done { result: Ok(compiled_module), worker_id } => {
-                    drop(tokens.pop());
-                    running -= 1;
-                    free_worker_ids.push(worker_id);
-                    drop(trans_worker_send.send(Message::CheckErrorMessages));
+                    if worker_id == TRANS_WORKER_ID {
+                        assert_eq!(trans_worker_state, TransWorkerState::LLVMing);
+                        trans_worker_state = TransWorkerState::Idle;
+                    } else {
+                        drop(tokens.pop());
+                        running -= 1;
+                        free_worker_ids.push(worker_id);
+                    }

                    match compiled_module.kind {
                        ModuleKind::Regular => {
@@ -1283,15 +1367,11 @@ fn start_executing_work(sess: &Session,
                }
                Message::Done { result: Err(()), worker_id: _ } => {
                    shared_emitter.fatal("aborting due to worker thread panic");
-                    drop(trans_worker_send.send(Message::CheckErrorMessages));
                    // Exit the coordinator thread
                    panic!()
                }
-                Message::TranslationDone => {
-                    translation_done = true;
-                }
-                msg @ Message::CheckErrorMessages => {
-                    bug!("unexpected message: {:?}", msg);
+                Message::TranslateItem => {
+                    bug!("the coordinator should not receive translation requests")
                }
            }
        }
@@ -1316,10 +1396,6 @@ fn spawn_work(cgcx: CodegenContext, work: WorkItem) {
    let depth = time_depth();

    thread::spawn(move || {
-        let _timing_guard = cgcx.time_graph
-                                .as_ref()
-                                .map(|tg| tg.start(time_graph::TimelineId(cgcx.worker),
-                                                   LLVM_WORK_PACKAGE_KIND));
        set_time_depth(depth);

        // Set up a destructor which will fire off a message that we're done as
@@ -1362,7 +1438,13 @@ fn drop(&mut self) {
        // we just ignore the result and then send off our message saying that
        // we're done, which if `execute_work_item` failed is unlikely to be
        // seen by the main thread, but hey we might as well try anyway.
-        bomb.result = Some(execute_work_item(&cgcx, work).unwrap());
+        bomb.result = {
+            let _timing_guard = cgcx.time_graph
+                                .as_ref()
+                                .map(|tg| tg.start(time_graph::TimelineId(cgcx.worker),
+                                                   LLVM_WORK_PACKAGE_KIND));
+            Some(execute_work_item(&cgcx, work).unwrap())
+        };
    });
 }

@@ -1578,6 +1660,7 @@ pub struct OngoingCrateTranslation {

    time_graph: Option<TimeGraph>,
    coordinator_send: Sender<Message>,
+    trans_worker_receive: Receiver<Message>,
    shared_emitter_main: SharedEmitterMain,
    future: thread::JoinHandle<CompiledModules>,
 }
@@ -1645,25 +1728,49 @@ pub fn join(self, sess: &Session) -> CrateTranslation {

    pub fn submit_translated_module_to_llvm(&self,
                                            sess: &Session,
-                                            mtrans: ModuleTranslation) {
+                                            mtrans: ModuleTranslation,
+                                            is_last: bool) {
        let module_config = match mtrans.kind {
            ModuleKind::Regular => self.regular_module_config.clone(sess),
            ModuleKind::Metadata => self.metadata_module_config.clone(sess),
            ModuleKind::Allocator => self.allocator_module_config.clone(sess),
        };

-        let work_item = build_work_item(mtrans,
-                                        module_config,
-                                        self.output_filenames.clone());
+        let llvm_work_item = build_work_item(mtrans,
+                                             module_config,
+                                             self.output_filenames.clone());

-        drop(self.coordinator_send.send(Message::WorkItem(work_item)));
+        drop(self.coordinator_send.send(Message::TranslationDone {
+            llvm_work_item,
+            is_last
+        }));
    }

-    pub fn signal_translation_done(&self) {
-        drop(self.coordinator_send.send(Message::TranslationDone));
+    pub fn submit_pre_translated_module_to_llvm(&self,
+                                                sess: &Session,
+                                                mtrans: ModuleTranslation,
+                                                is_last: bool) {
+        self.wait_for_signal_to_translate_item();
+        self.check_for_errors(sess);
+        self.submit_translated_module_to_llvm(sess, mtrans, is_last);
    }

    pub fn check_for_errors(&self, sess: &Session) {
        self.shared_emitter_main.check(sess, false);
    }
+
+    pub fn wait_for_signal_to_translate_item(&self) {
+        match self.trans_worker_receive.recv() {
+            Ok(Message::TranslateItem) => {
+                // Nothing to do
+            }
+            Ok(message) => {
+                panic!("unexpected message: {:?}", message)
+            }
+            Err(_) => {
+                // One of the LLVM threads must have panicked, fall through so
+                // error handling can be reached.
+            }
+        }
+    }
 }
--- a/src/librustc_trans/base.rs
+++ b/src/librustc_trans/base.rs
@@ -981,14 +981,15 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
            linker_info,
            false);

-        ongoing_translation.submit_translated_module_to_llvm(tcx.sess, metadata_module);
-        ongoing_translation.signal_translation_done();
+        ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess, metadata_module, true);

        assert_and_save_dep_graph(tcx,
                                  incremental_hashes_map,
                                  metadata_incr_hashes,
                                  link_meta);

+        ongoing_translation.check_for_errors(tcx.sess);
+
        return ongoing_translation;
    }

@@ -1032,35 +1033,87 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
        linker_info,
        no_integrated_as);

-    ongoing_translation.submit_translated_module_to_llvm(tcx.sess, metadata_module);
+    // Translate an allocator shim, if any
+    //
+    // If LTO is enabled and we've got some previous LLVM module we translated
+    // above, then we can just translate directly into that LLVM module. If not,
+    // however, we need to create a separate module and trans into that. Note
+    // that the separate translation is critical for the standard library where
+    // the rlib's object file doesn't have allocator functions but the dylib
+    // links in an object file that has allocator functions. When we're
+    // compiling a final LTO artifact, though, there's no need to worry about
+    // this as we're not working with this dual "rlib/dylib" functionality.
+    let allocator_module = if tcx.sess.lto() {
+        None
+    } else if let Some(kind) = tcx.sess.allocator_kind.get() {
+        unsafe {
+            let (llcx, llmod) =
+                context::create_context_and_module(tcx.sess, "allocator");
+            let modules = ModuleLlvm {
+                llmod: llmod,
+                llcx: llcx,
+            };
+            time(tcx.sess.time_passes(), "write allocator module", || {
+                allocator::trans(tcx, &modules, kind)
+            });
+
+            Some(ModuleTranslation {
+                name: link::ALLOCATOR_MODULE_NAME.to_string(),
+                symbol_name_hash: 0, // we always rebuild allocator shims
+                source: ModuleSource::Translated(modules),
+                kind: ModuleKind::Allocator,
+            })
+        }
+    } else {
+        None
+    };
+
+    if let Some(allocator_module) = allocator_module {
+        ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess, allocator_module, false);
+    }
+
+    let codegen_unit_count = codegen_units.len();
+    ongoing_translation.submit_pre_translated_module_to_llvm(tcx.sess,
+                                                             metadata_module,
+                                                             codegen_unit_count == 0);

    let translation_items = Arc::new(translation_items);

    let mut all_stats = Stats::default();
    let mut module_dispositions = tcx.sess.opts.incremental.as_ref().map(|_| Vec::new());

-    for cgu in codegen_units.into_iter() {
+    for (cgu_index, cgu) in codegen_units.into_iter().enumerate() {
+        ongoing_translation.wait_for_signal_to_translate_item();
        ongoing_translation.check_for_errors(tcx.sess);

-        let _timing_guard = time_graph
-            .as_ref()
-            .map(|time_graph| time_graph.start(write::TRANS_WORKER_TIMELINE,
-                                               write::TRANS_WORK_PACKAGE_KIND));
-
-        let dep_node = cgu.work_product_dep_node();
-        let ((stats, module), _) =
-            tcx.dep_graph.with_task(dep_node,
-                                    AssertDepGraphSafe(&shared_ccx),
-                                    AssertDepGraphSafe((cgu,
-                                                        translation_items.clone(),
-                                                        exported_symbols.clone())),
-                                    module_translation);
-        all_stats.extend(stats);
-
-        if let Some(ref mut module_dispositions) = module_dispositions {
-            module_dispositions.push(module.disposition());
-        }
-        ongoing_translation.submit_translated_module_to_llvm(tcx.sess, module);
+        let module = {
+            let _timing_guard = time_graph
+                .as_ref()
+                .map(|time_graph| time_graph.start(write::TRANS_WORKER_TIMELINE,
+                                                   write::TRANS_WORK_PACKAGE_KIND));
+            let dep_node = cgu.work_product_dep_node();
+            let ((stats, module), _) =
+                tcx.dep_graph.with_task(dep_node,
+                                        AssertDepGraphSafe(&shared_ccx),
+                                        AssertDepGraphSafe((cgu,
+                                                            translation_items.clone(),
+                                                            exported_symbols.clone())),
+                                        module_translation);
+            all_stats.extend(stats);
+
+            if let Some(ref mut module_dispositions) = module_dispositions {
+                module_dispositions.push(module.disposition());
+            }
+
+            module
+        };
+
+        let is_last_cgu = (cgu_index + 1) == codegen_unit_count;
+
+        ongoing_translation.submit_translated_module_to_llvm(tcx.sess,
+                                                             module,
+                                                             is_last_cgu);
+        ongoing_translation.check_for_errors(tcx.sess);
    }

    if let Some(module_dispositions) = module_dispositions {
@@ -1229,47 +1282,7 @@ fn module_translation<'a, 'tcx>(
        }
    }

-    // Translate an allocator shim, if any
-    //
-    // If LTO is enabled and we've got some previous LLVM module we translated
-    // above, then we can just translate directly into that LLVM module. If not,
-    // however, we need to create a separate module and trans into that. Note
-    // that the separate translation is critical for the standard library where
-    // the rlib's object file doesn't have allocator functions but the dylib
-    // links in an object file that has allocator functions. When we're
-    // compiling a final LTO artifact, though, there's no need to worry about
-    // this as we're not working with this dual "rlib/dylib" functionality.
-    let allocator_module = if tcx.sess.lto() {
-        None
-    } else if let Some(kind) = tcx.sess.allocator_kind.get() {
-        unsafe {
-            let (llcx, llmod) =
-                context::create_context_and_module(tcx.sess, "allocator");
-            let modules = ModuleLlvm {
-                llmod: llmod,
-                llcx: llcx,
-            };
-            time(tcx.sess.time_passes(), "write allocator module", || {
-                allocator::trans(tcx, &modules, kind)
-            });
-
-            Some(ModuleTranslation {
-                name: link::ALLOCATOR_MODULE_NAME.to_string(),
-                symbol_name_hash: 0, // we always rebuild allocator shims
-                source: ModuleSource::Translated(modules),
-                kind: ModuleKind::Allocator,
-            })
-        }
-    } else {
-        None
-    };
-
-    if let Some(allocator_module) = allocator_module {
-        ongoing_translation.submit_translated_module_to_llvm(tcx.sess, allocator_module);
-    }
-
    ongoing_translation.check_for_errors(tcx.sess);
-    ongoing_translation.signal_translation_done();

    assert_and_save_dep_graph(tcx,
                              incremental_hashes_map,