提交 4b2bdf7b 编写于 作者: A Alex Crichton

rustc: Don't inline in CGUs at -O0

This commit tweaks the behavior of inlining functions into multiple codegen
units when rustc is compiling in debug mode. Today rustc will unconditionally
treat `#[inline]` functions by translating them into all codegen units that
they're needed within, marking the linkage as `internal`. This commit changes
the behavior so that in debug mode (compiling at `-O0`) rustc will instead only
translate `#[inline]` functions into *one* codegen unit, forcing all other
codegen units to reference this one copy.

The goal here is to improve debug compile times by reducing the amount of
translation that happens on behalf of multiple codegen units. It was discovered
in #44941 that increasing the number of codegen units had the adverse side
effect of increasing the overal work done by the compiler, and the suspicion
here was that the compiler was inlining, translating, and codegen'ing more
functions with more codegen units (for example `String` would be basically
inlined into all codegen units if used). The strategy in this commit should
reduce the cost of `#[inline]` functions to being equivalent to one codegen
unit, which is only translating and codegen'ing inline functions once.

Collected [data] shows that this does indeed improve the situation from [before]
as the overall cpu-clock time increases at a much slower rate and when pinned to
one core rustc does not consume significantly more wall clock time than with one
codegen unit.

One caveat of this commit is that the symbol names for inlined functions that
are only translated once needed some slight tweaking. These inline functions
could be translated into multiple crates and we need to make sure the symbols
don't collideA so the crate name/disambiguator is mixed in to the symbol name
hash in these situations.

[data]: https://github.com/rust-lang/rust/issues/44941#issuecomment-334880911
[before]: https://github.com/rust-lang/rust/issues/44941#issuecomment-334583384
上级 ac76206b
......@@ -1097,6 +1097,8 @@ fn parse_optimization_fuel(slot: &mut Option<(String, u64)>, v: Option<&str>) ->
"generate a graphical HTML report of time spent in trans and LLVM"),
thinlto: bool = (false, parse_bool, [TRACKED],
"enable ThinLTO when possible"),
inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
"control whether #[inline] functions are in all cgus"),
}
pub fn default_lib_output() -> CrateType {
......
......@@ -98,8 +98,10 @@
//! DefPaths which are much more robust in the face of changes to the code base.
use monomorphize::Instance;
use trans_item::{TransItemExt, InstantiationMode};
use rustc::middle::weak_lang_items;
use rustc::middle::trans::TransItem;
use rustc::hir::def_id::DefId;
use rustc::hir::map as hir_map;
use rustc::ty::{self, Ty, TyCtxt, TypeFoldable};
......@@ -150,7 +152,10 @@ pub fn provide(providers: &mut Providers) {
fn get_symbol_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// the DefId of the item this name is for
def_id: Option<DefId>,
def_id: DefId,
// instance this name will be for
instance: Instance<'tcx>,
// type of the item, without any generic
// parameters substituted; this is
......@@ -160,7 +165,7 @@ fn get_symbol_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// values for generic type parameters,
// if any.
substs: Option<&'tcx Substs<'tcx>>)
substs: &'tcx Substs<'tcx>)
-> u64 {
debug!("get_symbol_hash(def_id={:?}, parameters={:?})", def_id, substs);
......@@ -170,7 +175,7 @@ fn get_symbol_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// the main symbol name is not necessarily unique; hash in the
// compiler's internal def-path, guaranteeing each symbol has a
// truly unique path
hasher.hash(def_id.map(|def_id| tcx.def_path_hash(def_id)));
hasher.hash(tcx.def_path_hash(def_id));
// Include the main item-type. Note that, in this case, the
// assertions about `needs_subst` may not hold, but this item-type
......@@ -186,19 +191,36 @@ fn get_symbol_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
}
// also include any type parameters (for generic items)
if let Some(substs) = substs {
assert!(!substs.has_erasable_regions());
assert!(!substs.needs_subst());
substs.visit_with(&mut hasher);
// If this is an instance of a generic function, we also hash in
// the ID of the instantiating crate. This avoids symbol conflicts
// in case the same instances is emitted in two crates of the same
// project.
if substs.types().next().is_some() {
hasher.hash(tcx.crate_name.as_str());
hasher.hash(tcx.sess.local_crate_disambiguator().as_str());
assert!(!substs.has_erasable_regions());
assert!(!substs.needs_subst());
substs.visit_with(&mut hasher);
let mut avoid_cross_crate_conflicts = false;
// If this is an instance of a generic function, we also hash in
// the ID of the instantiating crate. This avoids symbol conflicts
// in case the same instances is emitted in two crates of the same
// project.
if substs.types().next().is_some() {
avoid_cross_crate_conflicts = true;
}
// If we're dealing with an instance of a function that's inlined from
// another crate but we're marking it as globally shared to our
// compliation (aka we're not making an internal copy in each of our
// codegen units) then this symbol may become an exported (but hidden
// visibility) symbol. This means that multiple crates may do the same
// and we want to be sure to avoid any symbol conflicts here.
match TransItem::Fn(instance).instantiation_mode(tcx) {
InstantiationMode::GloballyShared { may_conflict: true } => {
avoid_cross_crate_conflicts = true;
}
_ => {}
}
if avoid_cross_crate_conflicts {
hasher.hash(tcx.crate_name.as_str());
hasher.hash(tcx.sess.local_crate_disambiguator().as_str());
}
});
......@@ -309,7 +331,7 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
// and should not matter anyhow.
let instance_ty = tcx.erase_regions(&instance_ty);
let hash = get_symbol_hash(tcx, Some(def_id), instance_ty, Some(substs));
let hash = get_symbol_hash(tcx, def_id, instance, instance_ty, substs);
SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id)).finish(hash)
}
......
......@@ -401,9 +401,9 @@ fn collect_items_rec<'a, 'tcx: 'a>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
}
fn record_accesses<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
caller: TransItem<'tcx>,
callees: &[TransItem<'tcx>],
inlining_map: &mut InliningMap<'tcx>) {
caller: TransItem<'tcx>,
callees: &[TransItem<'tcx>],
inlining_map: &mut InliningMap<'tcx>) {
let is_inlining_candidate = |trans_item: &TransItem<'tcx>| {
trans_item.instantiation_mode(tcx) == InstantiationMode::LocalCopy
};
......
......@@ -279,75 +279,74 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
let mut internalization_candidates = FxHashSet();
for trans_item in trans_items {
let is_root = trans_item.instantiation_mode(tcx) == InstantiationMode::GloballyShared;
match trans_item.instantiation_mode(tcx) {
InstantiationMode::GloballyShared { .. } => {}
InstantiationMode::LocalCopy => continue,
}
if is_root {
let characteristic_def_id = characteristic_def_id_of_trans_item(tcx, trans_item);
let is_volatile = is_incremental_build &&
trans_item.is_generic_fn();
let characteristic_def_id = characteristic_def_id_of_trans_item(tcx, trans_item);
let is_volatile = is_incremental_build &&
trans_item.is_generic_fn();
let codegen_unit_name = match characteristic_def_id {
Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile),
None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(),
};
let codegen_unit_name = match characteristic_def_id {
Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile),
None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(),
};
let make_codegen_unit = || {
CodegenUnit::new(codegen_unit_name.clone())
};
let make_codegen_unit = || {
CodegenUnit::new(codegen_unit_name.clone())
};
let codegen_unit = codegen_units.entry(codegen_unit_name.clone())
.or_insert_with(make_codegen_unit);
let (linkage, visibility) = match trans_item.explicit_linkage(tcx) {
Some(explicit_linkage) => (explicit_linkage, Visibility::Default),
None => {
match trans_item {
TransItem::Fn(ref instance) => {
let visibility = match instance.def {
InstanceDef::Item(def_id) => {
if def_id.is_local() {
if tcx.is_exported_symbol(def_id) {
Visibility::Default
} else {
internalization_candidates.insert(trans_item);
Visibility::Hidden
}
let codegen_unit = codegen_units.entry(codegen_unit_name.clone())
.or_insert_with(make_codegen_unit);
let (linkage, visibility) = match trans_item.explicit_linkage(tcx) {
Some(explicit_linkage) => (explicit_linkage, Visibility::Default),
None => {
match trans_item {
TransItem::Fn(ref instance) => {
let visibility = match instance.def {
InstanceDef::Item(def_id) => {
if def_id.is_local() {
if tcx.is_exported_symbol(def_id) {
Visibility::Default
} else {
internalization_candidates.insert(trans_item);
Visibility::Hidden
}
} else {
Visibility::Hidden
}
InstanceDef::FnPtrShim(..) |
InstanceDef::Virtual(..) |
InstanceDef::Intrinsic(..) |
InstanceDef::ClosureOnceShim { .. } |
InstanceDef::DropGlue(..) |
InstanceDef::CloneShim(..) => {
bug!("partitioning: Encountered unexpected
root translation item: {:?}",
trans_item)
}
};
(Linkage::External, visibility)
}
TransItem::Static(node_id) |
TransItem::GlobalAsm(node_id) => {
let def_id = tcx.hir.local_def_id(node_id);
let visibility = if tcx.is_exported_symbol(def_id) {
Visibility::Default
} else {
internalization_candidates.insert(trans_item);
}
InstanceDef::FnPtrShim(..) |
InstanceDef::Virtual(..) |
InstanceDef::Intrinsic(..) |
InstanceDef::ClosureOnceShim { .. } |
InstanceDef::DropGlue(..) |
InstanceDef::CloneShim(..) => {
Visibility::Hidden
};
(Linkage::External, visibility)
}
}
};
(Linkage::External, visibility)
}
TransItem::Static(node_id) |
TransItem::GlobalAsm(node_id) => {
let def_id = tcx.hir.local_def_id(node_id);
let visibility = if tcx.is_exported_symbol(def_id) {
Visibility::Default
} else {
Visibility::Hidden
};
(Linkage::External, visibility)
}
}
};
codegen_unit.items_mut().insert(trans_item, (linkage, visibility));
roots.insert(trans_item);
}
};
if visibility == Visibility::Hidden {
internalization_candidates.insert(trans_item);
}
codegen_unit.items_mut().insert(trans_item, (linkage, visibility));
roots.insert(trans_item);
}
// always ensure we have at least one CGU; otherwise, if we have a
......
......@@ -26,6 +26,7 @@
use rustc::hir;
use rustc::hir::def_id::DefId;
use rustc::middle::trans::{Linkage, Visibility};
use rustc::session::config::OptLevel;
use rustc::traits;
use rustc::ty::{self, Ty, TyCtxt, TypeFoldable};
use rustc::ty::subst::{Subst, Substs};
......@@ -44,7 +45,20 @@
pub enum InstantiationMode {
/// There will be exactly one instance of the given TransItem. It will have
/// external linkage so that it can be linked to from other codegen units.
GloballyShared,
GloballyShared {
/// In some compilation scenarios we may decide to take functions that
/// are typically `LocalCopy` and instead move them to `GloballyShared`
/// to avoid translating them a bunch of times. In this situation,
/// however, our local copy may conflict with other crates also
/// inlining the same function.
///
/// This flag indicates that this situation is occuring, and informs
/// symbol name calculation that some extra mangling is needed to
/// avoid conflicts. Note that this may eventually go away entirely if
/// ThinLTO enables us to *always* have a globally shared instance of a
/// function within one crate's compilation.
may_conflict: bool,
},
/// Each codegen unit containing a reference to the given TransItem will
/// have its own private copy of the function (with internal linkage).
......@@ -154,18 +168,31 @@ fn local_span(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Option<Span> {
fn instantiation_mode(&self,
tcx: TyCtxt<'a, 'tcx, 'tcx>)
-> InstantiationMode {
let inline_in_all_cgus =
tcx.sess.opts.debugging_opts.inline_in_all_cgus.unwrap_or_else(|| {
tcx.sess.opts.optimize != OptLevel::No
});
match *self.as_trans_item() {
TransItem::Fn(ref instance) => {
if self.explicit_linkage(tcx).is_none() &&
common::requests_inline(tcx, instance)
{
InstantiationMode::LocalCopy
if inline_in_all_cgus {
InstantiationMode::LocalCopy
} else {
InstantiationMode::GloballyShared { may_conflict: true }
}
} else {
InstantiationMode::GloballyShared
InstantiationMode::GloballyShared { may_conflict: false }
}
}
TransItem::Static(..) => InstantiationMode::GloballyShared,
TransItem::GlobalAsm(..) => InstantiationMode::GloballyShared,
TransItem::Static(..) => {
InstantiationMode::GloballyShared { may_conflict: false }
}
TransItem::GlobalAsm(..) => {
InstantiationMode::GloballyShared { may_conflict: false }
}
}
}
......
......@@ -10,6 +10,7 @@
// ignore-tidy-linelength
// compile-flags:-Zprint-trans-items=eager
// compile-flags:-Zinline-in-all-cgus
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<drop_in_place_intrinsic::StructWithDtor[0]> @@ drop_in_place_intrinsic0[Internal]
struct StructWithDtor(u32);
......
......@@ -10,6 +10,7 @@
// ignore-tidy-linelength
// compile-flags:-Zprint-trans-items=eager
// compile-flags:-Zinline-in-all-cgus
#![deny(dead_code)]
......
......@@ -10,6 +10,7 @@
// ignore-tidy-linelength
// compile-flags:-Zprint-trans-items=eager
// compile-flags:-Zinline-in-all-cgus
#![deny(dead_code)]
......
......@@ -10,6 +10,7 @@
// ignore-tidy-linelength
// compile-flags:-Zprint-trans-items=eager
// compile-flags:-Zinline-in-all-cgus
#![deny(dead_code)]
......
......@@ -10,6 +10,7 @@
// ignore-tidy-linelength
// compile-flags:-Zprint-trans-items=eager
// compile-flags:-Zinline-in-all-cgus
#![deny(dead_code)]
......
......@@ -10,6 +10,7 @@
// ignore-tidy-linelength
// compile-flags:-Zprint-trans-items=eager
// compile-flags:-Zinline-in-all-cgus
#![deny(dead_code)]
......
......@@ -10,6 +10,7 @@
// ignore-tidy-linelength
// compile-flags:-Zprint-trans-items=eager
// compile-flags:-Zinline-in-all-cgus
#![deny(dead_code)]
#![feature(coerce_unsized)]
......
......@@ -13,6 +13,7 @@
// We specify -Z incremental here because we want to test the partitioning for
// incremental compilation
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/extern-drop-glue
// compile-flags:-Zinline-in-all-cgus
#![allow(dead_code)]
#![crate_type="lib"]
......
......@@ -12,6 +12,7 @@
// We specify -Z incremental here because we want to test the partitioning for
// incremental compilation
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/inlining-from-extern-crate
// compile-flags:-Zinline-in-all-cgus
#![crate_type="lib"]
......
......@@ -12,6 +12,7 @@
// We specify -Z incremental here because we want to test the partitioning for
// incremental compilation
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/local-drop-glue
// compile-flags:-Zinline-in-all-cgus
#![allow(dead_code)]
#![crate_type="lib"]
......
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// ignore-tidy-linelength
// We specify -Z incremental here because we want to test the partitioning for
// incremental compilation
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/local-inlining-but-not-all
// compile-flags:-Zinline-in-all-cgus=no
#![allow(dead_code)]
#![crate_type="lib"]
mod inline {
//~ TRANS_ITEM fn local_inlining_but_not_all::inline[0]::inlined_function[0] @@ local_inlining_but_not_all-inline[External]
#[inline(always)]
pub fn inlined_function()
{
}
}
mod user1 {
use super::inline;
//~ TRANS_ITEM fn local_inlining_but_not_all::user1[0]::foo[0] @@ local_inlining_but_not_all-user1[Internal]
fn foo() {
inline::inlined_function();
}
}
mod user2 {
use super::inline;
//~ TRANS_ITEM fn local_inlining_but_not_all::user2[0]::bar[0] @@ local_inlining_but_not_all-user2[Internal]
fn bar() {
inline::inlined_function();
}
}
mod non_user {
//~ TRANS_ITEM fn local_inlining_but_not_all::non_user[0]::baz[0] @@ local_inlining_but_not_all-non_user[Internal]
fn baz() {
}
}
......@@ -12,6 +12,7 @@
// We specify -Z incremental here because we want to test the partitioning for
// incremental compilation
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/local-inlining
// compile-flags:-Zinline-in-all-cgus
#![allow(dead_code)]
#![crate_type="lib"]
......
......@@ -12,6 +12,7 @@
// We specify -Z incremental here because we want to test the partitioning for
// incremental compilation
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/local-transitive-inlining
// compile-flags:-Zinline-in-all-cgus
#![allow(dead_code)]
#![crate_type="lib"]
......
......@@ -13,6 +13,7 @@
// We specify -Z incremental here because we want to test the partitioning for
// incremental compilation
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/vtable-through-const
// compile-flags:-Zinline-in-all-cgus
// This test case makes sure, that references made through constants are
// recorded properly in the InliningMap.
......
......@@ -5,5 +5,6 @@
all:
$(RUSTC) cci_lib.rs
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3 \
-Z inline-in-all-cgus
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ .*cci_fn)" -eq "2" ]
......@@ -7,7 +7,8 @@
# in only one compilation unit.
all:
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3 \
-Z inline-in-all-cgus
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ i32\ .*inlined)" -eq "0" ]
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ]
[ "$$(cat "$(TMPDIR)"/foo.*.ll | grep -c define\ hidden\ i32\ .*normal)" -eq "1" ]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册