提交 2953edc7 编写于 作者: B bors

Auto merge of #98475 - notriddle:notriddle/index-fn-signatures, r=GuillaumeGomez

rustdoc: reference function signature types from the `p` array

This reduces the size of the function signature index, because it's common to have many functions that operate on the same types.

    $ wc -c search-index-old.js search-index-new.js
    5224374 search-index-old.js
    3932314 search-index-new.js

By my math, this reduces the uncompressed size of the search index by 32%.
On compressed signatures, the wins are less drastic, a mere 8%:

    $ wc -c search-index-old.js.gz search-index-new.js.gz
    404532 search-index-old.js.gz
    371635 search-index-new.js.gz
......@@ -1671,10 +1671,6 @@ pub(crate) fn is_impl_trait(&self) -> bool {
matches!(self, Type::ImplTrait(_))
}
pub(crate) fn is_primitive(&self) -> bool {
self.primitive_type().is_some()
}
pub(crate) fn projection(&self) -> Option<(&Type, DefId, PathSegment)> {
if let QPath { self_type, trait_, assoc, .. } = self {
Some((self_type, trait_.def_id(), *assoc.clone()))
......
......@@ -48,7 +48,6 @@ pub(crate) enum ItemType {
ProcAttribute = 23,
ProcDerive = 24,
TraitAlias = 25,
Generic = 26,
}
impl Serialize for ItemType {
......@@ -175,7 +174,6 @@ pub(crate) fn as_str(&self) -> &'static str {
ItemType::ProcAttribute => "attr",
ItemType::ProcDerive => "derive",
ItemType::TraitAlias => "traitalias",
ItemType::Generic => "generic",
}
}
}
......
......@@ -110,63 +110,72 @@ pub(crate) struct IndexItem {
/// A type used for the search index.
#[derive(Debug)]
pub(crate) struct RenderType {
name: Option<String>,
generics: Option<Vec<TypeWithKind>>,
id: Option<RenderTypeId>,
generics: Option<Vec<RenderType>>,
}
/// Full type of functions/methods in the search index.
#[derive(Debug)]
pub(crate) struct IndexItemFunctionType {
inputs: Vec<TypeWithKind>,
output: Vec<TypeWithKind>,
}
impl Serialize for IndexItemFunctionType {
impl Serialize for RenderType {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
// If we couldn't figure out a type, just write `null`.
let has_missing = self.inputs.iter().chain(self.output.iter()).any(|i| i.ty.name.is_none());
if has_missing {
serializer.serialize_none()
} else {
let id = match &self.id {
// 0 is a sentinel, everything else is one-indexed
None => 0,
Some(RenderTypeId::Index(idx)) => idx + 1,
_ => panic!("must convert render types to indexes before serializing"),
};
if let Some(generics) = &self.generics {
let mut seq = serializer.serialize_seq(None)?;
seq.serialize_element(&self.inputs)?;
match self.output.as_slice() {
[] => {}
[one] => seq.serialize_element(one)?,
all => seq.serialize_element(all)?,
}
seq.serialize_element(&id)?;
seq.serialize_element(generics)?;
seq.end()
} else {
id.serialize(serializer)
}
}
}
#[derive(Debug)]
pub(crate) struct TypeWithKind {
ty: RenderType,
kind: ItemType,
#[derive(Clone, Debug)]
pub(crate) enum RenderTypeId {
DefId(DefId),
Primitive(clean::PrimitiveType),
Index(usize),
}
impl From<(RenderType, ItemType)> for TypeWithKind {
fn from(x: (RenderType, ItemType)) -> TypeWithKind {
TypeWithKind { ty: x.0, kind: x.1 }
}
/// Full type of functions/methods in the search index.
#[derive(Debug)]
pub(crate) struct IndexItemFunctionType {
inputs: Vec<RenderType>,
output: Vec<RenderType>,
}
impl Serialize for TypeWithKind {
impl Serialize for IndexItemFunctionType {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut seq = serializer.serialize_seq(None)?;
seq.serialize_element(&self.ty.name)?;
seq.serialize_element(&self.kind)?;
if let Some(generics) = &self.ty.generics {
seq.serialize_element(generics)?;
// If we couldn't figure out a type, just write `0`.
let has_missing = self
.inputs
.iter()
.chain(self.output.iter())
.any(|i| i.id.is_none() && i.generics.is_none());
if has_missing {
0.serialize(serializer)
} else {
let mut seq = serializer.serialize_seq(None)?;
match &self.inputs[..] {
[one] if one.generics.is_none() => seq.serialize_element(one)?,
_ => seq.serialize_element(&self.inputs)?,
}
match &self.output[..] {
[] => {}
[one] if one.generics.is_none() => seq.serialize_element(one)?,
_ => seq.serialize_element(&self.output)?,
}
seq.end()
}
seq.end()
}
}
......@@ -2517,7 +2526,6 @@ fn item_ty_to_section(ty: ItemType) -> ItemSection {
ItemType::ProcAttribute => ItemSection::AttributeMacros,
ItemType::ProcDerive => ItemSection::DeriveMacros,
ItemType::TraitAlias => ItemSection::TraitAliases,
ItemType::Generic => unreachable!(),
}
}
......
......@@ -3,16 +3,19 @@
use rustc_data_structures::fx::FxHashMap;
use rustc_middle::ty::TyCtxt;
use rustc_span::symbol::{kw, Symbol};
use rustc_span::def_id::LOCAL_CRATE;
use rustc_span::symbol::Symbol;
use serde::ser::{Serialize, SerializeStruct, Serializer};
use crate::clean;
use crate::clean::types::{FnRetTy, Function, GenericBound, Generics, Type, WherePredicate};
use crate::clean::types::{
FnRetTy, Function, GenericBound, Generics, ItemId, Type, WherePredicate,
};
use crate::formats::cache::{Cache, OrphanImplItem};
use crate::formats::item_type::ItemType;
use crate::html::format::join_with_double_colon;
use crate::html::markdown::short_markdown_summary;
use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, TypeWithKind};
use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, RenderTypeId};
/// Builds the search index from the collected metadata
pub(crate) fn build_index<'tcx>(
......@@ -20,7 +23,7 @@ pub(crate) fn build_index<'tcx>(
cache: &mut Cache,
tcx: TyCtxt<'tcx>,
) -> String {
let mut defid_to_pathid = FxHashMap::default();
let mut itemid_to_pathid = FxHashMap::default();
let mut crate_paths = vec![];
// Attach all orphan items to the type's definition if the type
......@@ -48,14 +51,12 @@ pub(crate) fn build_index<'tcx>(
.doc_value()
.map_or_else(String::new, |s| short_markdown_summary(&s, &krate.module.link_names(cache)));
let Cache { ref mut search_index, ref paths, .. } = *cache;
// Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias,
// we need the alias element to have an array of items.
let mut aliases: BTreeMap<String, Vec<usize>> = BTreeMap::new();
// Sort search index items. This improves the compressibility of the search index.
search_index.sort_unstable_by(|k1, k2| {
cache.search_index.sort_unstable_by(|k1, k2| {
// `sort_unstable_by_key` produces lifetime errors
let k1 = (&k1.path, &k1.name, &k1.ty, &k1.parent);
let k2 = (&k2.path, &k2.name, &k2.ty, &k2.parent);
......@@ -63,7 +64,7 @@ pub(crate) fn build_index<'tcx>(
});
// Set up alias indexes.
for (i, item) in search_index.iter().enumerate() {
for (i, item) in cache.search_index.iter().enumerate() {
for alias in &item.aliases[..] {
aliases.entry(alias.as_str().to_lowercase()).or_default().push(i);
}
......@@ -74,24 +75,99 @@ pub(crate) fn build_index<'tcx>(
let mut lastpath = "";
let mut lastpathid = 0usize;
// First, on function signatures
let mut search_index = std::mem::replace(&mut cache.search_index, Vec::new());
for item in search_index.iter_mut() {
fn convert_render_type(
ty: &mut RenderType,
cache: &mut Cache,
itemid_to_pathid: &mut FxHashMap<ItemId, usize>,
lastpathid: &mut usize,
crate_paths: &mut Vec<(ItemType, Symbol)>,
) {
if let Some(generics) = &mut ty.generics {
for item in generics {
convert_render_type(item, cache, itemid_to_pathid, lastpathid, crate_paths);
}
}
let Cache { ref paths, ref external_paths, .. } = *cache;
let Some(id) = ty.id.clone() else {
assert!(ty.generics.is_some());
return;
};
let (itemid, path, item_type) = match id {
RenderTypeId::DefId(defid) => {
if let Some(&(ref fqp, item_type)) =
paths.get(&defid).or_else(|| external_paths.get(&defid))
{
(ItemId::DefId(defid), *fqp.last().unwrap(), item_type)
} else {
ty.id = None;
return;
}
}
RenderTypeId::Primitive(primitive) => (
ItemId::Primitive(primitive, LOCAL_CRATE),
primitive.as_sym(),
ItemType::Primitive,
),
RenderTypeId::Index(_) => return,
};
match itemid_to_pathid.entry(itemid) {
Entry::Occupied(entry) => ty.id = Some(RenderTypeId::Index(*entry.get())),
Entry::Vacant(entry) => {
let pathid = *lastpathid;
entry.insert(pathid);
*lastpathid += 1;
crate_paths.push((item_type, path));
ty.id = Some(RenderTypeId::Index(pathid));
}
}
}
if let Some(search_type) = &mut item.search_type {
for item in &mut search_type.inputs {
convert_render_type(
item,
cache,
&mut itemid_to_pathid,
&mut lastpathid,
&mut crate_paths,
);
}
for item in &mut search_type.output {
convert_render_type(
item,
cache,
&mut itemid_to_pathid,
&mut lastpathid,
&mut crate_paths,
);
}
}
}
let Cache { ref paths, .. } = *cache;
// Then, on parent modules
let crate_items: Vec<&IndexItem> = search_index
.iter_mut()
.map(|item| {
item.parent_idx = item.parent.and_then(|defid| match defid_to_pathid.entry(defid) {
Entry::Occupied(entry) => Some(*entry.get()),
Entry::Vacant(entry) => {
let pathid = lastpathid;
entry.insert(pathid);
lastpathid += 1;
item.parent_idx =
item.parent.and_then(|defid| match itemid_to_pathid.entry(ItemId::DefId(defid)) {
Entry::Occupied(entry) => Some(*entry.get()),
Entry::Vacant(entry) => {
let pathid = lastpathid;
entry.insert(pathid);
lastpathid += 1;
if let Some(&(ref fqp, short)) = paths.get(&defid) {
crate_paths.push((short, *fqp.last().unwrap()));
Some(pathid)
} else {
None
if let Some(&(ref fqp, short)) = paths.get(&defid) {
crate_paths.push((short, *fqp.last().unwrap()));
Some(pathid)
} else {
None
}
}
}
});
});
// Omit the parent path if it is same to that of the prior item.
if lastpath == &item.path {
......@@ -151,13 +227,40 @@ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
"`{}` is missing idx",
item.name
);
// 0 is a sentinel, everything else is one-indexed
item.parent_idx.map(|x| x + 1).unwrap_or(0)
})
.collect::<Vec<_>>(),
)?;
crate_data.serialize_field(
"f",
&self.items.iter().map(|item| &item.search_type).collect::<Vec<_>>(),
&self
.items
.iter()
.map(|item| {
// Fake option to get `0` out as a sentinel instead of `null`.
// We want to use `0` because it's three less bytes.
enum FunctionOption<'a> {
Function(&'a IndexItemFunctionType),
None,
}
impl<'a> Serialize for FunctionOption<'a> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
FunctionOption::None => 0.serialize(serializer),
FunctionOption::Function(ty) => ty.serialize(serializer),
}
}
}
match &item.search_type {
Some(ty) => FunctionOption::Function(ty),
None => FunctionOption::None,
}
})
.collect::<Vec<_>>(),
)?;
crate_data.serialize_field(
"p",
......@@ -202,36 +305,33 @@ pub(crate) fn get_function_type_for_search<'tcx>(
_ => return None,
};
inputs.retain(|a| a.ty.name.is_some());
output.retain(|a| a.ty.name.is_some());
inputs.retain(|a| a.id.is_some() || a.generics.is_some());
output.retain(|a| a.id.is_some() || a.generics.is_some());
Some(IndexItemFunctionType { inputs, output })
}
fn get_index_type(clean_type: &clean::Type, generics: Vec<TypeWithKind>) -> RenderType {
fn get_index_type(clean_type: &clean::Type, generics: Vec<RenderType>) -> RenderType {
RenderType {
name: get_index_type_name(clean_type).map(|s| s.as_str().to_ascii_lowercase()),
id: get_index_type_id(clean_type),
generics: if generics.is_empty() { None } else { Some(generics) },
}
}
fn get_index_type_name(clean_type: &clean::Type) -> Option<Symbol> {
fn get_index_type_id(clean_type: &clean::Type) -> Option<RenderTypeId> {
match *clean_type {
clean::Type::Path { ref path, .. } => {
let path_segment = path.segments.last().unwrap();
Some(path_segment.name)
}
clean::Type::Path { ref path, .. } => Some(RenderTypeId::DefId(path.def_id())),
clean::DynTrait(ref bounds, _) => {
let path = &bounds[0].trait_;
Some(path.segments.last().unwrap().name)
Some(RenderTypeId::DefId(path.def_id()))
}
// We return an empty name because we don't care about the generic name itself.
clean::Generic(_) | clean::ImplTrait(_) => Some(kw::Empty),
clean::Primitive(ref p) => Some(p.as_sym()),
clean::Primitive(p) => Some(RenderTypeId::Primitive(p)),
clean::BorrowedRef { ref type_, .. } | clean::RawPointer(_, ref type_) => {
get_index_type_name(type_)
get_index_type_id(type_)
}
clean::BareFunction(_)
| clean::Generic(_)
| clean::ImplTrait(_)
| clean::Tuple(_)
| clean::Slice(_)
| clean::Array(_, _)
......@@ -254,16 +354,10 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>(
arg: &'a Type,
tcx: TyCtxt<'tcx>,
recurse: usize,
res: &mut Vec<TypeWithKind>,
res: &mut Vec<RenderType>,
cache: &Cache,
) {
fn insert_ty(
res: &mut Vec<TypeWithKind>,
tcx: TyCtxt<'_>,
ty: Type,
mut generics: Vec<TypeWithKind>,
cache: &Cache,
) {
fn insert_ty(res: &mut Vec<RenderType>, ty: Type, mut generics: Vec<RenderType>) {
// generics and impl trait are both identified by their generics,
// rather than a type name itself
let anonymous = ty.is_full_generic() || ty.is_impl_trait();
......@@ -316,20 +410,11 @@ fn insert_ty(
return;
}
}
let mut index_ty = get_index_type(&ty, generics);
if index_ty.name.as_ref().map(|s| s.is_empty() && generics_empty).unwrap_or(true) {
let index_ty = get_index_type(&ty, generics);
if index_ty.id.is_none() && generics_empty {
return;
}
if anonymous {
// We remove the name of the full generic because we have no use for it.
index_ty.name = Some(String::new());
res.push(TypeWithKind::from((index_ty, ItemType::Generic)));
} else if let Some(kind) = ty.def_id(cache).map(|did| tcx.def_kind(did).into()) {
res.push(TypeWithKind::from((index_ty, kind)));
} else if ty.is_primitive() {
// This is a primitive, let's store it as such.
res.push(TypeWithKind::from((index_ty, ItemType::Primitive)));
}
res.push(index_ty);
}
if recurse >= 10 {
......@@ -379,7 +464,7 @@ fn insert_ty(
}
}
}
insert_ty(res, tcx, arg.clone(), ty_generics, cache);
insert_ty(res, arg.clone(), ty_generics);
}
// Otherwise we check if the trait bounds are "inlined" like `T: Option<u32>`...
if let Some(bound) = generics.params.iter().find(|g| g.is_type() && g.name == arg_s) {
......@@ -398,7 +483,7 @@ fn insert_ty(
);
}
}
insert_ty(res, tcx, arg.clone(), ty_generics, cache);
insert_ty(res, arg.clone(), ty_generics);
}
} else if let Type::ImplTrait(ref bounds) = *arg {
let mut ty_generics = Vec::new();
......@@ -416,7 +501,7 @@ fn insert_ty(
);
}
}
insert_ty(res, tcx, arg.clone(), ty_generics, cache);
insert_ty(res, arg.clone(), ty_generics);
} else {
// This is not a type parameter. So for example if we have `T, U: Option<T>`, and we're
// looking at `Option`, we enter this "else" condition, otherwise if it's `T`, we don't.
......@@ -437,7 +522,7 @@ fn insert_ty(
);
}
}
insert_ty(res, tcx, arg.clone(), ty_generics, cache);
insert_ty(res, arg.clone(), ty_generics);
}
}
......@@ -450,7 +535,7 @@ fn get_fn_inputs_and_outputs<'tcx>(
tcx: TyCtxt<'tcx>,
impl_generics: Option<&(clean::Type, clean::Generics)>,
cache: &Cache,
) -> (Vec<TypeWithKind>, Vec<TypeWithKind>) {
) -> (Vec<RenderType>, Vec<RenderType>) {
let decl = &func.decl;
let combined_generics;
......@@ -478,9 +563,7 @@ fn get_fn_inputs_and_outputs<'tcx>(
if !args.is_empty() {
all_types.extend(args);
} else {
if let Some(kind) = arg.type_.def_id(cache).map(|did| tcx.def_kind(did).into()) {
all_types.push(TypeWithKind::from((get_index_type(&arg.type_, vec![]), kind)));
}
all_types.push(get_index_type(&arg.type_, vec![]));
}
}
......@@ -497,9 +580,7 @@ fn get_fn_inputs_and_outputs<'tcx>(
cache,
);
if ret_types.is_empty() {
if let Some(kind) = return_type.def_id(cache).map(|did| tcx.def_kind(did).into()) {
ret_types.push(TypeWithKind::from((get_index_type(return_type, vec![]), kind)));
}
ret_types.push(get_index_type(return_type, vec![]));
}
}
_ => {}
......
......@@ -81,3 +81,62 @@ let ResultsTable;
* }}
*/
let Results;
/**
* A pair of [inputs, outputs], or 0 for null. This is stored in the search index.
* The JavaScript deserializes this into FunctionSearchType.
*
* Numeric IDs are *ONE-indexed* into the paths array (`p`). Zero is used as a sentinel for `null`
* because `null` is four bytes while `0` is one byte.
*
* An input or output can be encoded as just a number if there is only one of them, AND
* it has no generics. The no generics rule exists to avoid ambiguity: imagine if you had
* a function with a single output, and that output had a single generic:
*
* fn something() -> Result<usize, usize>
*
* If output was allowed to be any RawFunctionType, it would look like this
*
* [[], [50, [3, 3]]]
*
* The problem is that the above output could be interpreted as either a type with ID 50 and two
* generics, or it could be interpreted as a pair of types, the first one with ID 50 and the second
* with ID 3 and a single generic parameter that is also ID 3. We avoid this ambiguity by choosing
* in favor of the pair of types interpretation. This is why the `(number|Array<RawFunctionType>)`
* is used instead of `(RawFunctionType|Array<RawFunctionType>)`.
*
* @typedef {(
* 0 |
* [(number|Array<RawFunctionType>)] |
* [(number|Array<RawFunctionType>), (number|Array<RawFunctionType>)]
* )}
*/
let RawFunctionSearchType;
/**
* A single function input or output type. This is either a single path ID, or a pair of
* [path ID, generics].
*
* Numeric IDs are *ONE-indexed* into the paths array (`p`). Zero is used as a sentinel for `null`
* because `null` is four bytes while `0` is one byte.
*
* @typedef {number | [number, Array<RawFunctionType>]}
*/
let RawFunctionType;
/**
* @typedef {{
* inputs: Array<FunctionType>,
* outputs: Array<FunctionType>,
* }}
*/
let FunctionSearchType;
/**
* @typedef {{
* name: (null|string),
* ty: (null|number),
* generics: Array<FunctionType>,
* }}
*/
let FunctionType;
......@@ -114,10 +114,6 @@ function levenshtein(s1, s2) {
function initSearch(rawSearchIndex) {
const MAX_LEV_DISTANCE = 3;
const MAX_RESULTS = 200;
const GENERICS_DATA = 2;
const NAME = 0;
const INPUTS_DATA = 0;
const OUTPUT_DATA = 1;
const NO_TYPE_FILTER = -1;
/**
* @type {Array<Row>}
......@@ -895,21 +891,18 @@ function initSearch(rawSearchIndex) {
* @return {integer} - Returns the best match (if any) or `MAX_LEV_DISTANCE + 1`.
*/
function checkGenerics(row, elem, defaultLev) {
if (row.length <= GENERICS_DATA || row[GENERICS_DATA].length === 0) {
return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1;
} else if (row[GENERICS_DATA].length > 0 && row[GENERICS_DATA][0][NAME] === "") {
if (row.length > GENERICS_DATA) {
return checkGenerics(row[GENERICS_DATA][0], elem, defaultLev);
}
if (row.generics.length === 0) {
return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1;
} else if (row.generics.length > 0 && row.generics[0].name === null) {
return checkGenerics(row.generics[0], elem, defaultLev);
}
// The names match, but we need to be sure that all generics kinda
// match as well.
let elem_name;
if (elem.generics.length > 0 && row[GENERICS_DATA].length >= elem.generics.length) {
if (elem.generics.length > 0 && row.generics.length >= elem.generics.length) {
const elems = Object.create(null);
for (const entry of row[GENERICS_DATA]) {
elem_name = entry[NAME];
for (const entry of row.generics) {
elem_name = entry.name;
if (elem_name === "") {
// Pure generic, needs to check into it.
if (checkGenerics(entry, elem, MAX_LEV_DISTANCE + 1) !== 0) {
......@@ -963,7 +956,7 @@ function initSearch(rawSearchIndex) {
*/
function checkIfInGenerics(row, elem) {
let lev = MAX_LEV_DISTANCE + 1;
for (const entry of row[GENERICS_DATA]) {
for (const entry of row.generics) {
lev = Math.min(checkType(entry, elem, true), lev);
if (lev === 0) {
break;
......@@ -984,23 +977,22 @@ function initSearch(rawSearchIndex) {
* no match, returns `MAX_LEV_DISTANCE + 1`.
*/
function checkType(row, elem, literalSearch) {
if (row[NAME].length === 0) {
if (row.name === null) {
// This is a pure "generic" search, no need to run other checks.
if (row.length > GENERICS_DATA) {
if (row.generics.length > 0) {
return checkIfInGenerics(row, elem);
}
return MAX_LEV_DISTANCE + 1;
}
let lev = levenshtein(row[NAME], elem.name);
let lev = levenshtein(row.name, elem.name);
if (literalSearch) {
if (lev !== 0) {
// The name didn't match, let's try to check if the generics do.
if (elem.generics.length === 0) {
const checkGeneric = (row.length > GENERICS_DATA &&
row[GENERICS_DATA].length > 0);
if (checkGeneric && row[GENERICS_DATA]
.findIndex(tmp_elem => tmp_elem[NAME] === elem.name) !== -1) {
const checkGeneric = row.generics.length > 0;
if (checkGeneric && row.generics
.findIndex(tmp_elem => tmp_elem.name === elem.name) !== -1) {
return 0;
}
}
......@@ -1009,7 +1001,7 @@ function initSearch(rawSearchIndex) {
return checkGenerics(row, elem, MAX_LEV_DISTANCE + 1);
}
return 0;
} else if (row.length > GENERICS_DATA) {
} else if (row.generics.length > 0) {
if (elem.generics.length === 0) {
if (lev === 0) {
return 0;
......@@ -1059,9 +1051,9 @@ function initSearch(rawSearchIndex) {
function findArg(row, elem, typeFilter) {
let lev = MAX_LEV_DISTANCE + 1;
if (row && row.type && row.type[INPUTS_DATA] && row.type[INPUTS_DATA].length > 0) {
for (const input of row.type[INPUTS_DATA]) {
if (!typePassesFilter(typeFilter, input[1])) {
if (row && row.type && row.type.inputs && row.type.inputs.length > 0) {
for (const input of row.type.inputs) {
if (!typePassesFilter(typeFilter, input.ty)) {
continue;
}
lev = Math.min(lev, checkType(input, elem, parsedQuery.literalSearch));
......@@ -1086,13 +1078,10 @@ function initSearch(rawSearchIndex) {
function checkReturned(row, elem, typeFilter) {
let lev = MAX_LEV_DISTANCE + 1;
if (row && row.type && row.type.length > OUTPUT_DATA) {
let ret = row.type[OUTPUT_DATA];
if (typeof ret[0] === "string") {
ret = [ret];
}
if (row && row.type && row.type.output.length > 0) {
const ret = row.type.output;
for (const ret_ty of ret) {
if (!typePassesFilter(typeFilter, ret_ty[1])) {
if (!typePassesFilter(typeFilter, ret_ty.ty)) {
continue;
}
lev = Math.min(lev, checkType(ret_ty, elem, parsedQuery.literalSearch));
......@@ -1836,6 +1825,97 @@ function initSearch(rawSearchIndex) {
filterCrates);
}
/**
* Convert a list of RawFunctionType / ID to object-based FunctionType.
*
* Crates often have lots of functions in them, and it's common to have a large number of
* functions that operate on a small set of data types, so the search index compresses them
* by encoding function parameter and return types as indexes into an array of names.
*
* Even when a general-purpose compression algorithm is used, this is still a win. I checked.
* https://github.com/rust-lang/rust/pull/98475#issue-1284395985
*
* The format for individual function types is encoded in
* librustdoc/html/render/mod.rs: impl Serialize for RenderType
*
* @param {null|Array<RawFunctionType>} types
* @param {Array<{name: string, ty: number}>} lowercasePaths
*
* @return {Array<FunctionSearchType>}
*/
function buildItemSearchTypeAll(types, lowercasePaths) {
const PATH_INDEX_DATA = 0;
const GENERICS_DATA = 1;
return types.map(type => {
let pathIndex, generics;
if (typeof type === "number") {
pathIndex = type;
generics = [];
} else {
pathIndex = type[PATH_INDEX_DATA];
generics = buildItemSearchTypeAll(type[GENERICS_DATA], lowercasePaths);
}
return {
// `0` is used as a sentinel because it's fewer bytes than `null`
name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name,
ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
generics: generics,
};
});
}
/**
* Convert from RawFunctionSearchType to FunctionSearchType.
*
* Crates often have lots of functions in them, and function signatures are sometimes complex,
* so rustdoc uses a pretty tight encoding for them. This function converts it to a simpler,
* object-based encoding so that the actual search code is more readable and easier to debug.
*
* The raw function search type format is generated using serde in
* librustdoc/html/render/mod.rs: impl Serialize for IndexItemFunctionType
*
* @param {RawFunctionSearchType} functionSearchType
* @param {Array<{name: string, ty: number}>} lowercasePaths
*
* @return {null|FunctionSearchType}
*/
function buildFunctionSearchType(functionSearchType, lowercasePaths) {
const INPUTS_DATA = 0;
const OUTPUT_DATA = 1;
// `0` is used as a sentinel because it's fewer bytes than `null`
if (functionSearchType === 0) {
return null;
}
let inputs, output;
if (typeof functionSearchType[INPUTS_DATA] === "number") {
const pathIndex = functionSearchType[INPUTS_DATA];
inputs = [{
name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name,
ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
generics: [],
}];
} else {
inputs = buildItemSearchTypeAll(functionSearchType[INPUTS_DATA], lowercasePaths);
}
if (functionSearchType.length > 1) {
if (typeof functionSearchType[OUTPUT_DATA] === "number") {
const pathIndex = functionSearchType[OUTPUT_DATA];
output = [{
name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name,
ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty,
generics: [],
}];
} else {
output = buildItemSearchTypeAll(functionSearchType[OUTPUT_DATA], lowercasePaths);
}
} else {
output = [];
}
return {
inputs, output,
};
}
function buildIndex(rawSearchIndex) {
searchIndex = [];
/**
......@@ -1862,14 +1942,22 @@ function initSearch(rawSearchIndex) {
* q[i] contains the full path of the item, or an empty string indicating
* "same as q[i-1]".
*
* i[i], f[i] are a mystery.
* i[i] contains an item's parent, usually a module. For compactness,
* it is a set of indexes into the `p` array.
*
* f[i] contains function signatures, or `0` if the item isn't a function.
* Functions are themselves encoded as arrays. The first item is a list of
* types representing the function's inputs, and the second list item is a list
* of types representing the function's output. Tuples are flattened.
* Types are also represented as arrays; the first item is an index into the `p`
* array, while the second is a list of types representing any generic parameters.
*
* `a` defines aliases with an Array of pairs: [name, offset], where `offset`
* points into the n/t/d/q/i/f arrays.
*
* `doc` contains the description of the crate.
*
* `p` is a mystery and isn't the same length as n/t/d/q/i/f.
* `p` is a list of path/type pairs. It is used for parents and function parameters.
*
* @type {{
* doc: string,
......@@ -1879,7 +1967,7 @@ function initSearch(rawSearchIndex) {
* d: Array<string>,
* q: Array<string>,
* i: Array<Number>,
* f: Array<Array<?>>,
* f: Array<RawFunctionSearchType>,
* p: Array<Object>,
* }}
*/
......@@ -1923,9 +2011,14 @@ function initSearch(rawSearchIndex) {
// [Number] index to items]
const aliases = crateCorpus.a;
// an array of [{name: String, ty: Number}]
const lowercasePaths = [];
// convert `rawPaths` entries into object form
// generate normalizedPaths for function search mode
let len = paths.length;
for (i = 0; i < len; ++i) {
lowercasePaths.push({ty: paths[i][0], name: paths[i][1].toLowerCase()});
paths[i] = {ty: paths[i][0], name: paths[i][1]};
}
......@@ -1955,7 +2048,7 @@ function initSearch(rawSearchIndex) {
path: itemPaths[i] ? itemPaths[i] : lastPath,
desc: itemDescs[i],
parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined,
type: itemFunctionSearchTypes[i],
type: buildFunctionSearchType(itemFunctionSearchTypes[i], lowercasePaths),
id: id,
normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""),
};
......
......@@ -761,7 +761,6 @@ fn from_tcx(kind: ItemType, _tcx: TyCtxt<'_>) -> Self {
TraitAlias => ItemKind::TraitAlias,
ProcAttribute => ItemKind::ProcAttribute,
ProcDerive => ItemKind::ProcDerive,
Generic => unreachable!(),
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册