提交 5187763c 编写于 作者: A Alex Crichton

rustc: Allow target-specific default cgus

Some targets, like msp430 and nvptx, don't work with multiple codegen units
right now for bugs or fundamental reasons. To expose this allow targets to
express a default.

Closes #45000
上级 692b94ae
......@@ -352,7 +352,7 @@ pub struct Options {
actually_rustdoc: bool [TRACKED],
// Number of object files/codegen units to produce on the backend
codegen_units: usize [UNTRACKED],
cli_forced_codegen_units: Option<usize> [UNTRACKED],
}
);
......@@ -505,7 +505,7 @@ pub fn basic_options() -> Options {
unstable_features: UnstableFeatures::Disallow,
debug_assertions: true,
actually_rustdoc: false,
codegen_units: 1,
cli_forced_codegen_units: None,
}
}
......@@ -1711,48 +1711,6 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
let incremental = debugging_opts.incremental.as_ref().map(|m| PathBuf::from(m));
let codegen_units = codegen_units.unwrap_or_else(|| {
match opt_level {
// If we're compiling at `-O0` then default to 16 codegen units.
// The number here shouldn't matter too too much as debug mode
// builds don't rely on performance at all, meaning that lost
// opportunities for inlining through multiple codegen units is
// a non-issue.
//
// Note that the high number here doesn't mean that we'll be
// spawning a large number of threads in parallel. The backend
// of rustc contains global rate limiting through the
// `jobserver` crate so we'll never overload the system with too
// much work, but rather we'll only be optimizing when we're
// otherwise cooperating with other instances of rustc.
//
// Rather the high number here means that we should be able to
// keep a lot of idle cpus busy. By ensuring that no codegen
// unit takes *too* long to build we'll be guaranteed that all
// cpus will finish pretty closely to one another and we should
// make relatively optimal use of system resources
//
// Another note worth mentioning here, however, is that this number
// isn't *too* high. When codegen units are increased that means we
// currently have to codegen `#[inline]` functions into each codegen
// unit, which means the more codegen units we're using the more we
// may be generating. In other words, increasing codegen units may
// increase the overall work the compiler does. If we don't have
// enough cores to make up for this loss then increasing the number
// of codegen units could become an overall loss!
//
// As a result we choose a hopefully conservative value 16, which
// should be more than the number of cpus of most hardware compiling
// Rust but also not too much for 2-4 core machines to have too much
// loss of compile time.
OptLevel::No => 16,
// All other optimization levels default use one codegen unit,
// the historical default in Rust for a Long Time.
_ => 1,
}
});
(Options {
crate_types,
optimize: opt_level,
......@@ -1777,7 +1735,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
unstable_features: UnstableFeatures::from_environment(),
debug_assertions,
actually_rustdoc: false,
codegen_units,
cli_forced_codegen_units: codegen_units,
},
cfg)
}
......
......@@ -636,6 +636,43 @@ pub fn consider_optimizing<T: Fn() -> String>(&self, crate_name: &str, msg: T) -
}
ret
}
/// Returns the number of codegen units that should be used for this
/// compilation
pub fn codegen_units(&self) -> usize {
if let Some(n) = self.opts.cli_forced_codegen_units {
return n
}
if let Some(n) = self.target.target.options.default_codegen_units {
return n as usize
}
match self.opts.optimize {
// If we're compiling at `-O0` then default to 16 codegen units.
// The number here shouldn't matter too too much as debug mode
// builds don't rely on performance at all, meaning that lost
// opportunities for inlining through multiple codegen units is
// a non-issue.
//
// Note that the high number here doesn't mean that we'll be
// spawning a large number of threads in parallel. The backend
// of rustc contains global rate limiting through the
// `jobserver` crate so we'll never overload the system with too
// much work, but rather we'll only be optimizing when we're
// otherwise cooperating with other instances of rustc.
//
// Rather the high number here means that we should be able to
// keep a lot of idle cpus busy. By ensuring that no codegen
// unit takes *too* long to build we'll be guaranteed that all
// cpus will finish pretty closely to one another and we should
// make relatively optimal use of system resources
config::OptLevel::No => 16,
// All other optimization levels default use one codegen unit,
// the historical default in Rust for a Long Time.
_ => 1,
}
}
}
pub fn build_session(sopts: config::Options,
......
......@@ -430,6 +430,9 @@ pub struct TargetOptions {
/// The minimum alignment for global symbols.
pub min_global_align: Option<u64>,
/// Default number of codegen units to use in debug mode
pub default_codegen_units: Option<u64>,
}
impl Default for TargetOptions {
......@@ -492,6 +495,7 @@ fn default() -> TargetOptions {
crt_static_respected: false,
stack_probes: false,
min_global_align: None,
default_codegen_units: None,
}
}
}
......@@ -732,6 +736,7 @@ pub fn from_json(obj: Json) -> TargetResult {
key!(crt_static_respected, bool);
key!(stack_probes, bool);
key!(min_global_align, Option<u64>);
key!(default_codegen_units, Option<u64>);
if let Some(array) = obj.find("abi-blacklist").and_then(Json::as_array) {
for name in array.iter().filter_map(|abi| abi.as_string()) {
......@@ -924,6 +929,7 @@ fn to_json(&self) -> Json {
target_option_val!(crt_static_respected);
target_option_val!(stack_probes);
target_option_val!(min_global_align);
target_option_val!(default_codegen_units);
if default.abi_blacklist != self.options.abi_blacklist {
d.insert("abi-blacklist".to_string(), self.options.abi_blacklist.iter()
......
......@@ -48,6 +48,11 @@ pub fn target() -> TargetResult {
// code because of the extra costs it involves.
relocation_model: "static".to_string(),
// Right now we invoke an external assembler and this isn't
// compatible with multiple codegen units, and plus we probably
// don't want to invoke that many gcc instances.
default_codegen_units: Some(1),
.. Default::default( )
}
})
......
......@@ -467,7 +467,7 @@ fn link_rlib<'a>(sess: &'a Session,
// of when we do and don't keep .#module-name#.bc files around.
let user_wants_numbered_bitcode =
sess.opts.output_types.contains_key(&OutputType::Bitcode) &&
sess.opts.codegen_units > 1;
sess.codegen_units() > 1;
if !sess.opts.cg.save_temps && !user_wants_numbered_bitcode {
remove(sess, &bc_filename);
}
......
......@@ -1037,10 +1037,10 @@ fn produce_final_output_artifacts(sess: &Session,
let needs_crate_object = crate_output.outputs.contains_key(&OutputType::Exe);
let keep_numbered_bitcode = needs_crate_bitcode ||
(user_wants_bitcode && sess.opts.codegen_units > 1);
(user_wants_bitcode && sess.codegen_units() > 1);
let keep_numbered_objects = needs_crate_object ||
(user_wants_objects && sess.opts.codegen_units > 1);
(user_wants_objects && sess.codegen_units() > 1);
for module in compiled_modules.modules.iter() {
let module_name = Some(&module.name[..]);
......@@ -2052,7 +2052,7 @@ pub fn join(self, sess: &Session, dep_graph: &DepGraph) -> CrateTranslation {
// FIXME: time_llvm_passes support - does this use a global context or
// something?
if sess.opts.codegen_units == 1 && sess.time_llvm_passes() {
if sess.codegen_units() == 1 && sess.time_llvm_passes() {
unsafe { llvm::LLVMRustPrintPassTimings(); }
}
......
......@@ -1218,7 +1218,7 @@ fn collect_and_partition_translation_items<'a, 'tcx>(
let strategy = if tcx.sess.opts.debugging_opts.incremental.is_some() {
PartitioningStrategy::PerModule
} else {
PartitioningStrategy::FixedUnitCount(tcx.sess.opts.codegen_units)
PartitioningStrategy::FixedUnitCount(tcx.sess.codegen_units())
};
let codegen_units = time(time_passes, "codegen unit partitioning", || {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册