From 94f3037f4bc2f18ac1c427e145928cc766b573fe Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Mon, 8 Jan 2018 12:30:52 +0100 Subject: [PATCH] Shorten names of some compiler generated artifacts. --- src/librustc/mir/mono.rs | 12 +++++++ src/librustc/session/config.rs | 2 ++ src/librustc_data_structures/base_n.rs | 22 ++++++++----- src/librustc_incremental/persist/fs.rs | 11 ++++--- .../persist/work_product.rs | 4 +-- src/librustc_mir/monomorphize/partitioning.rs | 33 ++++++++++++++----- src/librustc_trans/assert_module_sources.rs | 6 +++- src/librustc_trans/back/bytecode.rs | 2 +- src/librustc_trans/context.rs | 2 +- src/tools/compiletest/src/runtest.rs | 4 +++ 10 files changed, 70 insertions(+), 28 deletions(-) diff --git a/src/librustc/mir/mono.rs b/src/librustc/mir/mono.rs index 5f74f088237..efdf4066815 100644 --- a/src/librustc/mir/mono.rs +++ b/src/librustc/mir/mono.rs @@ -12,9 +12,11 @@ use syntax::symbol::InternedString; use ty::Instance; use util::nodemap::FxHashMap; +use rustc_data_structures::base_n; use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult, StableHasher}; use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode}; +use std::hash::Hash; #[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)] pub enum MonoItem<'tcx> { @@ -119,6 +121,16 @@ pub fn items_mut(&mut self) { &mut self.items } + + pub fn mangle_name(human_readable_name: &str) -> String { + // We generate a 80 bit hash from the name. This should be enough to + // avoid collisions and is still reasonably short for filenames. + let mut hasher = StableHasher::new(); + human_readable_name.hash(&mut hasher); + let hash: u128 = hasher.finish(); + let hash = hash & ((1u128 << 80) - 1); + base_n::encode(hash, base_n::CASE_INSENSITIVE) + } } impl<'tcx> HashStable> for CodegenUnit<'tcx> { diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs index 05b1d584e9c..9b578f49423 100644 --- a/src/librustc/session/config.rs +++ b/src/librustc/session/config.rs @@ -1236,6 +1236,8 @@ fn parse_optimization_fuel(slot: &mut Option<(String, u64)>, v: Option<&str>) -> "rewrite operators on i128 and u128 into lang item calls (typically provided \ by compiler-builtins) so translation doesn't need to support them, overriding the default for the current target"), + human_readable_cgu_names: bool = (false, parse_bool, [TRACKED], + "generate human-readable, predictable names for codegen units"), } pub fn default_lib_output() -> CrateType { diff --git a/src/librustc_data_structures/base_n.rs b/src/librustc_data_structures/base_n.rs index cf54229fa7f..d333b6393b9 100644 --- a/src/librustc_data_structures/base_n.rs +++ b/src/librustc_data_structures/base_n.rs @@ -13,18 +13,21 @@ use std::str; -pub const MAX_BASE: u64 = 64; -pub const ALPHANUMERIC_ONLY: u64 = 62; +pub const MAX_BASE: usize = 64; +pub const ALPHANUMERIC_ONLY: usize = 62; +pub const CASE_INSENSITIVE: usize = 36; const BASE_64: &'static [u8; MAX_BASE as usize] = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; #[inline] -pub fn push_str(mut n: u64, base: u64, output: &mut String) { +pub fn push_str(mut n: u128, base: usize, output: &mut String) { debug_assert!(base >= 2 && base <= MAX_BASE); - let mut s = [0u8; 64]; + let mut s = [0u8; 128]; let mut index = 0; + let base = base as u128; + loop { s[index] = BASE_64[(n % base) as usize]; index += 1; @@ -39,16 +42,16 @@ pub fn push_str(mut n: u64, base: u64, output: &mut String) { } #[inline] -pub fn encode(n: u64, base: u64) -> String { - let mut s = String::with_capacity(13); +pub fn encode(n: u128, base: usize) -> String { + let mut s = String::new(); push_str(n, base, &mut s); s } #[test] fn test_encode() { - fn test(n: u64, base: u64) { - assert_eq!(Ok(n), u64::from_str_radix(&encode(n, base), base as u32)); + fn test(n: u128, base: usize) { + assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32)); } for base in 2..37 { @@ -57,7 +60,8 @@ fn test(n: u64, base: u64) { test(35, base); test(36, base); test(37, base); - test(u64::max_value(), base); + test(u64::max_value() as u128, base); + test(u128::max_value(), base); for i in 0 .. 1_000 { test(i * 983, base); diff --git a/src/librustc_incremental/persist/fs.rs b/src/librustc_incremental/persist/fs.rs index 42b1fcccace..f4171f951f4 100644 --- a/src/librustc_incremental/persist/fs.rs +++ b/src/librustc_incremental/persist/fs.rs @@ -137,7 +137,7 @@ // or hexadecimal numbers (we want short file and directory names). Since these // numbers will be used in file names, we choose an encoding that is not // case-sensitive (as opposed to base64, for example). -const INT_ENCODE_BASE: u64 = 36; +const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE; pub fn dep_graph_path(sess: &Session) -> PathBuf { in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME) @@ -357,7 +357,7 @@ pub fn finalize_session_directory(sess: &Session, svh: Svh) { let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]); // Append the svh - base_n::push_str(svh.as_u64(), INT_ENCODE_BASE, &mut new_sub_dir_name); + base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name); // Create the full path let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); @@ -465,7 +465,7 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf { let directory_name = format!("s-{}-{}-working", timestamp, - base_n::encode(random_number as u64, + base_n::encode(random_number as u128, INT_ENCODE_BASE)); debug!("generate_session_dir_path: directory_name = {}", directory_name); let directory_path = crate_dir.join(directory_name); @@ -599,7 +599,7 @@ fn timestamp_to_string(timestamp: SystemTime) -> String { let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000; - base_n::encode(micros, INT_ENCODE_BASE) + base_n::encode(micros as u128, INT_ENCODE_BASE) } fn string_to_timestamp(s: &str) -> Result { @@ -626,7 +626,8 @@ fn crate_path(sess: &Session, // The full crate disambiguator is really long. 64 bits of it should be // sufficient. let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash(); - let crate_disambiguator = base_n::encode(crate_disambiguator, INT_ENCODE_BASE); + let crate_disambiguator = base_n::encode(crate_disambiguator as u128, + INT_ENCODE_BASE); let crate_name = format!("{}-{}", crate_name, crate_disambiguator); incr_dir.join(crate_name) diff --git a/src/librustc_incremental/persist/work_product.rs b/src/librustc_incremental/persist/work_product.rs index f23b8dc85b8..879132bcacf 100644 --- a/src/librustc_incremental/persist/work_product.rs +++ b/src/librustc_incremental/persist/work_product.rs @@ -35,9 +35,9 @@ pub fn save_trans_partition(sess: &Session, let extension = match kind { WorkProductFileKind::Object => "o", WorkProductFileKind::Bytecode => "bc", - WorkProductFileKind::BytecodeCompressed => "bc-compressed", + WorkProductFileKind::BytecodeCompressed => "bc.z", }; - let file_name = format!("cgu-{}.{}", cgu_name, extension); + let file_name = format!("{}.{}", cgu_name, extension); let path_in_incr_dir = in_incr_comp_dir_sess(sess, &file_name); match link_or_copy(path, &path_in_incr_dir) { Ok(_) => Some((kind, file_name)), diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs index 11c68a11669..e899cc072e0 100644 --- a/src/librustc_mir/monomorphize/partitioning.rs +++ b/src/librustc_mir/monomorphize/partitioning.rs @@ -200,7 +200,16 @@ fn as_codegen_unit(&self) -> &CodegenUnit<'tcx> { } // Anything we can't find a proper codegen unit for goes into this. -const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit"; +fn fallback_cgu_name(tcx: TyCtxt) -> InternedString { + const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit"; + + if tcx.sess.opts.debugging_opts.human_readable_cgu_names { + Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str() + } else { + Symbol::intern(&CodegenUnit::mangle_name(FALLBACK_CODEGEN_UNIT)).as_str() + } +} + pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>, trans_items: I, @@ -297,7 +306,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let codegen_unit_name = match characteristic_def_id { Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile), - None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(), + None => fallback_cgu_name(tcx), }; let make_codegen_unit = || { @@ -381,7 +390,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>, // always ensure we have at least one CGU; otherwise, if we have a // crate with just types (for example), we could wind up with no CGU if codegen_units.is_empty() { - let codegen_unit_name = Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(); + let codegen_unit_name = fallback_cgu_name(tcx); codegen_units.insert(codegen_unit_name.clone(), CodegenUnit::new(codegen_unit_name.clone())); } @@ -630,10 +639,10 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, // Unfortunately we cannot just use the `ty::item_path` infrastructure here // because we need paths to modules and the DefIds of those are not // available anymore for external items. - let mut mod_path = String::with_capacity(64); + let mut cgu_name = String::with_capacity(64); let def_path = tcx.def_path(def_id); - mod_path.push_str(&tcx.crate_name(def_path.krate).as_str()); + cgu_name.push_str(&tcx.crate_name(def_path.krate).as_str()); for part in tcx.def_path(def_id) .data @@ -644,15 +653,21 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, _ => false, } }) { - mod_path.push_str("-"); - mod_path.push_str(&part.data.as_interned_str()); + cgu_name.push_str("-"); + cgu_name.push_str(&part.data.as_interned_str()); } if volatile { - mod_path.push_str(".volatile"); + cgu_name.push_str(".volatile"); } - return Symbol::intern(&mod_path[..]).as_str(); + let cgu_name = if tcx.sess.opts.debugging_opts.human_readable_cgu_names { + cgu_name + } else { + CodegenUnit::mangle_name(&cgu_name) + }; + + Symbol::intern(&cgu_name[..]).as_str() } fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString { diff --git a/src/librustc_trans/assert_module_sources.rs b/src/librustc_trans/assert_module_sources.rs index c891bd8aaf4..0e8af1b9511 100644 --- a/src/librustc_trans/assert_module_sources.rs +++ b/src/librustc_trans/assert_module_sources.rs @@ -28,8 +28,10 @@ //! perturb the reuse results. use rustc::dep_graph::{DepNode, DepConstructor}; +use rustc::mir::mono::CodegenUnit; use rustc::ty::TyCtxt; use syntax::ast; +use syntax_pos::symbol::Symbol; use rustc::ich::{ATTR_PARTITION_REUSED, ATTR_PARTITION_TRANSLATED}; const MODULE: &'static str = "module"; @@ -71,9 +73,11 @@ fn check_attr(&self, attr: &ast::Attribute) { } let mname = self.field(attr, MODULE); + let mangled_cgu_name = CodegenUnit::mangle_name(&mname.as_str()); + let mangled_cgu_name = Symbol::intern(&mangled_cgu_name).as_str(); let dep_node = DepNode::new(self.tcx, - DepConstructor::CompileCodegenUnit(mname.as_str())); + DepConstructor::CompileCodegenUnit(mangled_cgu_name)); if let Some(loaded_from_cache) = self.tcx.dep_graph.was_loaded_from_cache(&dep_node) { match (disposition, loaded_from_cache) { diff --git a/src/librustc_trans/back/bytecode.rs b/src/librustc_trans/back/bytecode.rs index 9e4630c08f9..212d1aaf055 100644 --- a/src/librustc_trans/back/bytecode.rs +++ b/src/librustc_trans/back/bytecode.rs @@ -47,7 +47,7 @@ // The version number this compiler will write to bytecode objects in rlibs pub const RLIB_BYTECODE_OBJECT_VERSION: u8 = 2; -pub const RLIB_BYTECODE_EXTENSION: &str = "bytecode.encoded"; +pub const RLIB_BYTECODE_EXTENSION: &str = "bc.z"; pub fn encode(identifier: &str, bytecode: &[u8]) -> Vec { let mut encoded = Vec::new(); diff --git a/src/librustc_trans/context.rs b/src/librustc_trans/context.rs index 248b37c43b4..3014963a97f 100644 --- a/src/librustc_trans/context.rs +++ b/src/librustc_trans/context.rs @@ -572,7 +572,7 @@ pub fn generate_local_symbol_name(&self, prefix: &str) -> String { let mut name = String::with_capacity(prefix.len() + 6); name.push_str(prefix); name.push_str("."); - base_n::push_str(idx as u64, base_n::ALPHANUMERIC_ONLY, &mut name); + base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name); name } diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index dbeee39e606..774733e7068 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -1520,6 +1520,10 @@ fn make_compile_args(&self, input_file: &Path, output_file: TargetLocation) -> C rustc.args(&["-Z", "incremental-queries"]); } + if self.config.mode == CodegenUnits { + rustc.args(&["-Z", "human_readable_cgu_names"]); + } + match self.config.mode { CompileFail | ParseFail | Incremental => { // If we are extracting and matching errors in the new -- GitLab