filter.rs 11.6 KB
Newer Older
B
Benjamin Sago 已提交
1 2 3 4
use std::cmp::Ordering;
use std::os::unix::fs::MetadataExt;

use getopts;
B
Ben S 已提交
5
use glob;
B
Benjamin Sago 已提交
6 7 8 9 10 11 12 13 14
use natord;

use fs::File;
use options::misfire::Misfire;


/// The **file filter** processes a vector of files before outputting them,
/// filtering and sorting the files depending on the user’s command-line
/// flags.
B
Ben S 已提交
15
#[derive(Default, PartialEq, Debug, Clone)]
B
Benjamin Sago 已提交
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
pub struct FileFilter {

    /// Whether directories should be listed first, and other types of file
    /// second. Some users prefer it like this.
    pub list_dirs_first: bool,

    /// The metadata field to sort by.
    pub sort_field: SortField,

    /// Whether to reverse the sorting order. This would sort the largest
    /// files first, or files starting with Z, or the most-recently-changed
    /// ones, depending on the sort field.
    pub reverse: bool,

    /// Whether to include invisible “dot” files when listing a directory.
    ///
    /// Files starting with a single “.” are used to determine “system” or
    /// “configuration” files that should not be displayed in a regular
    /// directory listing.
    ///
    /// This came about more or less by a complete historical accident,
    /// when the original `ls` tried to hide `.` and `..`:
    /// https://plus.google.com/+RobPikeTheHuman/posts/R58WgWwN9jp
    ///
    ///   When one typed ls, however, these files appeared, so either Ken or
    ///   Dennis added a simple test to the program. It was in assembler then,
    ///   but the code in question was equivalent to something like this:
    ///      if (name[0] == '.') continue;
    ///   This statement was a little shorter than what it should have been,
    ///   which is:
    ///      if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue;
    ///   but hey, it was easy.
    ///
    ///   Two things resulted.
    ///
    ///   First, a bad precedent was set. A lot of other lazy programmers
    ///   introduced bugs by making the same simplification. Actual files
    ///   beginning with periods are often skipped when they should be counted.
    ///
    ///   Second, and much worse, the idea of a "hidden" or "dot" file was
    ///   created. As a consequence, more lazy programmers started dropping
    ///   files into everyone's home directory. I don't have all that much
    ///   stuff installed on the machine I'm using to type this, but my home
    ///   directory has about a hundred dot files and I don't even know what
    ///   most of them are or whether they're still needed. Every file name
    ///   evaluation that goes through my home directory is slowed down by
    ///   this accumulated sludge.
63
    pub dot_filter: DotFilter,
B
Ben S 已提交
64 65 66 67

    /// Glob patterns to ignore. Any file name that matches *any* of these
    /// patterns won't be displayed in the list.
    ignore_patterns: IgnorePatterns,
B
Benjamin Sago 已提交
68 69 70 71 72 73 74 75 76 77
}

impl FileFilter {

    /// Determines the set of file filter options to use, based on the user’s
    /// command-line arguments.
    pub fn deduce(matches: &getopts::Matches) -> Result<FileFilter, Misfire> {
        Ok(FileFilter {
            list_dirs_first: matches.opt_present("group-directories-first"),
            reverse:         matches.opt_present("reverse"),
78
            sort_field:      SortField::deduce(matches)?,
79
            dot_filter:      DotFilter::deduce(matches),
80
            ignore_patterns: IgnorePatterns::deduce(matches)?,
B
Benjamin Sago 已提交
81 82 83 84
        })
    }

    /// Remove every file in the given vector that does *not* pass the
B
Ben S 已提交
85 86
    /// filter predicate for files found inside a directory.
    pub fn filter_child_files(&self, files: &mut Vec<File>) {
87 88 89
        match self.dot_filter {
            DotFilter::JustFiles => files.retain(|f| !f.is_dotfile()),
            DotFilter::ShowDotfiles => {/* keep all elements */},
90
            DotFilter::ShowDotfilesAndDots => unimplemented!(),
B
Benjamin Sago 已提交
91
        }
B
Ben S 已提交
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106

        files.retain(|f| !self.ignore_patterns.is_ignored(f));
    }

    /// Remove every file in the given vector that does *not* pass the
    /// filter predicate for file names specified on the command-line.
    ///
    /// The rules are different for these types of files than the other
    /// type because the ignore rules can be used with globbing. For
    /// example, running "exa -I='*.tmp' .vimrc" shouldn't filter out the
    /// dotfile, because it's been directly specified. But running
    /// "exa -I='*.ogg' music/*" should filter out the ogg files obtained
    /// from the glob, even though the globbing is done by the shell!
    pub fn filter_argument_files(&self, files: &mut Vec<File>) {
        files.retain(|f| !self.ignore_patterns.is_ignored(f));
B
Benjamin Sago 已提交
107 108 109
    }

    /// Sort the files in the given vector based on the sort field option.
B
Ben S 已提交
110 111
    pub fn sort_files<'a, F>(&self, files: &mut Vec<F>)
    where F: AsRef<File<'a>> {
B
Benjamin Sago 已提交
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229

        files.sort_by(|a, b| self.compare_files(a.as_ref(), b.as_ref()));

        if self.reverse {
            files.reverse();
        }

        if self.list_dirs_first {
            // This relies on the fact that `sort_by` is stable.
            files.sort_by(|a, b| b.as_ref().is_directory().cmp(&a.as_ref().is_directory()));
        }
    }

    /// Compares two files to determine the order they should be listed in,
    /// depending on the search field.
    pub fn compare_files(&self, a: &File, b: &File) -> Ordering {
        use self::SortCase::{Sensitive, Insensitive};

        match self.sort_field {
            SortField::Unsorted  => Ordering::Equal,

            SortField::Name(Sensitive)    => natord::compare(&a.name, &b.name),
            SortField::Name(Insensitive)  => natord::compare_ignore_case(&a.name, &b.name),

            SortField::Size          => a.metadata.len().cmp(&b.metadata.len()),
            SortField::FileInode     => a.metadata.ino().cmp(&b.metadata.ino()),
            SortField::ModifiedDate  => a.metadata.mtime().cmp(&b.metadata.mtime()),
            SortField::AccessedDate  => a.metadata.atime().cmp(&b.metadata.atime()),
            SortField::CreatedDate   => a.metadata.ctime().cmp(&b.metadata.ctime()),

            SortField::Extension(Sensitive) => match a.ext.cmp(&b.ext) {
                Ordering::Equal  => natord::compare(&*a.name, &*b.name),
                order            => order,
            },

            SortField::Extension(Insensitive) => match a.ext.cmp(&b.ext) {
                Ordering::Equal  => natord::compare_ignore_case(&*a.name, &*b.name),
                order            => order,
            },
        }
    }
}


/// User-supplied field to sort by.
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum SortField {

    /// Don't apply any sorting. This is usually used as an optimisation in
    /// scripts, where the order doesn't matter.
    Unsorted,

    /// The file name. This is the default sorting.
    Name(SortCase),

    /// The file's extension, with extensionless files being listed first.
    Extension(SortCase),

    /// The file's size.
    Size,

    /// The file's inode. This is sometimes analogous to the order in which
    /// the files were created on the hard drive.
    FileInode,

    /// The time at which this file was modified (the `mtime`).
    ///
    /// As this is stored as a Unix timestamp, rather than a local time
    /// instance, the time zone does not matter and will only be used to
    /// display the timestamps, not compare them.
    ModifiedDate,

    /// The time at this file was accessed (the `atime`).
    ///
    /// Oddly enough, this field rarely holds the *actual* accessed time.
    /// Recording a read time means writing to the file each time it’s read
    /// slows the whole operation down, so many systems will only update the
    /// timestamp in certain circumstances. This has become common enough that
    /// it’s now expected behaviour for the `atime` field.
    /// http://unix.stackexchange.com/a/8842
    AccessedDate,

    /// The time at which this file was changed or created (the `ctime`).
    ///
    /// Contrary to the name, this field is used to mark the time when a
    /// file's metadata changed -- its permissions, owners, or link count.
    ///
    /// In original Unix, this was, however, meant as creation time.
    /// https://www.bell-labs.com/usr/dmr/www/cacm.html
    CreatedDate,
}

/// Whether a field should be sorted case-sensitively or case-insensitively.
///
/// This determines which of the `natord` functions to use.
#[derive(PartialEq, Debug, Copy, Clone)]
pub enum SortCase {

    /// Sort files case-sensitively with uppercase first, with ‘A’ coming
    /// before ‘a’.
    Sensitive,

    /// Sort files case-insensitively, with ‘A’ being equal to ‘a’.
    Insensitive,
}

impl Default for SortField {
    fn default() -> SortField {
        SortField::Name(SortCase::Sensitive)
    }
}

impl SortField {

    /// Determine the sort field to use, based on the presence of a “sort”
    /// argument. This will return `Err` if the option is there, but does not
    /// correspond to a valid field.
    fn deduce(matches: &getopts::Matches) -> Result<SortField, Misfire> {
230 231 232 233 234

        const SORTS: &[&str] = &[ "name", "Name", "size", "extension",
                                  "Extension", "modified", "accessed",
                                  "created", "inode", "none" ];

B
Benjamin Sago 已提交
235 236 237 238 239 240 241 242 243 244 245 246
        if let Some(word) = matches.opt_str("sort") {
            match &*word {
                "name" | "filename"   => Ok(SortField::Name(SortCase::Sensitive)),
                "Name" | "Filename"   => Ok(SortField::Name(SortCase::Insensitive)),
                "size" | "filesize"   => Ok(SortField::Size),
                "ext"  | "extension"  => Ok(SortField::Extension(SortCase::Sensitive)),
                "Ext"  | "Extension"  => Ok(SortField::Extension(SortCase::Insensitive)),
                "mod"  | "modified"   => Ok(SortField::ModifiedDate),
                "acc"  | "accessed"   => Ok(SortField::AccessedDate),
                "cr"   | "created"    => Ok(SortField::CreatedDate),
                "none"                => Ok(SortField::Unsorted),
                "inode"               => Ok(SortField::FileInode),
247
                field                 => Err(Misfire::bad_argument("sort", field, SORTS))
B
Benjamin Sago 已提交
248 249 250 251 252 253 254
            }
        }
        else {
            Ok(SortField::default())
        }
    }
}
B
Ben S 已提交
255 256


257 258 259
/// Usually files in Unix use a leading dot to be hidden or visible, but two
/// entries in particular are "extra-hidden": `.` and `..`, which only become
/// visible after an extra `-a` option.
260
#[derive(PartialEq, Debug, Copy, Clone)]
261 262 263 264
pub enum DotFilter {

    /// Shows files, dotfiles, and `.` and `..`.
    ShowDotfilesAndDots,
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281

    /// Show files and dotfiles, but hide `.` and `..`.
    ShowDotfiles,

    /// Just show files, hiding anything beginning with a dot.
    JustFiles,
}

impl Default for DotFilter {
    fn default() -> DotFilter {
        DotFilter::JustFiles
    }
}


impl DotFilter {
    pub fn deduce(matches: &getopts::Matches) -> DotFilter {
282 283 284 285 286
        match matches.opt_count("all") {
            0 => DotFilter::JustFiles,
            1 => DotFilter::ShowDotfiles,
            _ => DotFilter::ShowDotfilesAndDots,
        }
287 288 289 290
    }
}


B
Ben S 已提交
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
#[derive(PartialEq, Default, Debug, Clone)]
struct IgnorePatterns {
    patterns: Vec<glob::Pattern>,
}

impl IgnorePatterns {
    /// Determines the set of file filter options to use, based on the user’s
    /// command-line arguments.
    pub fn deduce(matches: &getopts::Matches) -> Result<IgnorePatterns, Misfire> {
        let patterns = match matches.opt_str("ignore-glob") {
            None => Ok(Vec::new()),
            Some(is) => is.split('|').map(|a| glob::Pattern::new(a)).collect(),
        };

        Ok(IgnorePatterns {
306
            patterns: patterns?,
B
Ben S 已提交
307 308 309 310 311 312 313
        })
    }

    fn is_ignored(&self, file: &File) -> bool {
        self.patterns.iter().any(|p| p.matches(&file.name))
    }
}