mod.rs 20.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

11 12 13 14 15 16 17 18 19 20 21 22
/*!

Cross-platform path support

This module implements support for two flavors of paths. `PosixPath` represents
a path on any unix-like system, whereas `WindowsPath` represents a path on
Windows. This module also exposes a typedef `Path` which is equal to the
appropriate platform-specific path variant.

Both `PosixPath` and `WindowsPath` implement a trait `GenericPath`, which
contains the set of methods that behave the same for both paths. They each also
implement some methods that could not be expressed in `GenericPath`, yet behave
23
identically for both path flavors, such as `.components()`.
24 25 26 27 28 29 30 31 32

The three main design goals of this module are 1) to avoid unnecessary
allocation, 2) to behave the same regardless of which flavor of path is being
used, and 3) to support paths that cannot be represented in UTF-8 (as Linux has
no restriction on paths beyond disallowing NUL).

## Usage

Usage of this module is fairly straightforward. Unless writing platform-specific
33
code, `Path` should be used to refer to the platform-native path.
34

35 36
Creation of a path is typically done with either `Path::new(some_str)` or
`Path::new(some_vec)`. This path can be modified with `.push()` and
37 38 39 40 41 42
`.pop()` (and other setters). The resulting Path can either be passed to another
API that expects a path, or can be turned into a &[u8] with `.as_vec()` or a
Option<&str> with `.as_str()`. Similarly, attributes of the path can be queried
with methods such as `.filename()`. There are also methods that return a new
path instead of modifying the receiver, such as `.join()` or `.dir_path()`.

K
Kevin Ballard 已提交
43
Paths are always kept in normalized form. This means that creating the path
44
`Path::new("a/b/../c")` will return the path `a/c`. Similarly any attempt
K
Kevin Ballard 已提交
45 46
to mutate the path will always leave it in normalized form.

47
When rendering a path to some form of output, there is a method `.display()`
48 49 50 51 52 53 54 55
which is compatible with the `format!()` parameter `{}`. This will render the
path as a string, replacing all non-utf8 sequences with the Replacement
Character (U+FFFD). As such it is not suitable for passing to any API that
actually operates on the path; it is only intended for display.

## Example

```rust
56
let mut path = Path::new("/tmp/path");
A
Alex Crichton 已提交
57
println!("path: {}", path.display());
58 59
path.set_filename("foo");
path.push("bar");
A
Alex Crichton 已提交
60 61
println!("new path: {}", path.display());
println!("path exists: {}", path.exists());
62 63 64
```

*/
65 66 67 68

use container::Container;
use c_str::CString;
use clone::Clone;
69
use fmt;
70
use iter::Iterator;
71 72
use option::{Option, None, Some};
use str;
73 74
use str::{MaybeOwned, Str, StrSlice, from_utf8_lossy};
use slice::{OwnedCloneableVector, OwnedVector, Vector};
D
Daniel Micay 已提交
75
use slice::{ImmutableEqVector, ImmutableVector};
76
use vec::Vec;
77 78 79

/// Typedef for POSIX file paths.
/// See `posix::Path` for more info.
80
pub use PosixPath = self::posix::Path;
81

K
Kevin Ballard 已提交
82 83
/// Typedef for Windows file paths.
/// See `windows::Path` for more info.
84
pub use WindowsPath = self::windows::Path;
85 86 87

/// Typedef for the platform-native path type
#[cfg(unix)]
88
pub use Path = self::posix::Path;
K
Kevin Ballard 已提交
89 90
/// Typedef for the platform-native path type
#[cfg(windows)]
91
pub use Path = self::windows::Path;
92 93 94

/// Typedef for the platform-native component iterator
#[cfg(unix)]
P
Palmer Cox 已提交
95
pub use Components = self::posix::Components;
96 97
/// Typedef for the platform-native reverse component iterator
#[cfg(unix)]
P
Palmer Cox 已提交
98
pub use RevComponents = self::posix::RevComponents;
99 100
/// Typedef for the platform-native component iterator
#[cfg(windows)]
P
Palmer Cox 已提交
101
pub use Components = self::windows::Components;
102 103
/// Typedef for the platform-native reverse component iterator
#[cfg(windows)]
P
Palmer Cox 已提交
104
pub use RevComponents = self::windows::RevComponents;
105 106 107

/// Typedef for the platform-native str component iterator
#[cfg(unix)]
P
Palmer Cox 已提交
108
pub use StrComponents = self::posix::StrComponents;
109 110
/// Typedef for the platform-native reverse str component iterator
#[cfg(unix)]
P
Palmer Cox 已提交
111
pub use RevStrComponents = self::posix::RevStrComponents;
112 113
/// Typedef for the platform-native str component iterator
#[cfg(windows)]
P
Palmer Cox 已提交
114
pub use StrComponents = self::windows::StrComponents;
115 116
/// Typedef for the platform-native reverse str component iterator
#[cfg(windows)]
P
Palmer Cox 已提交
117
pub use RevStrComponents = self::windows::RevStrComponents;
118

119 120 121
/// Alias for the platform-native separator character.
#[cfg(unix)]
pub use SEP = self::posix::SEP;
122
/// Alias for the platform-native separator character.
123 124 125
#[cfg(windows)]
pub use SEP = self::windows::SEP;

126
/// Alias for the platform-native separator byte.
127 128 129 130 131 132
#[cfg(unix)]
pub use SEP_BYTE = self::posix::SEP_BYTE;
/// Alias for the platform-native separator byte.
#[cfg(windows)]
pub use SEP_BYTE = self::windows::SEP_BYTE;

133 134 135 136 137 138 139 140 141 142 143 144 145
/// Typedef for the platform-native separator char func
#[cfg(unix)]
pub use is_sep = self::posix::is_sep;
/// Typedef for the platform-native separator char func
#[cfg(windows)]
pub use is_sep = self::windows::is_sep;
/// Typedef for the platform-native separator byte func
#[cfg(unix)]
pub use is_sep_byte = self::posix::is_sep_byte;
/// Typedef for the platform-native separator byte func
#[cfg(windows)]
pub use is_sep_byte = self::windows::is_sep_byte;

146 147
pub mod posix;
pub mod windows;
148 149 150

/// A trait that represents the generic operations available on paths
pub trait GenericPath: Clone + GenericPathUnsafe {
151
    /// Creates a new Path from a byte vector or string.
152 153 154 155
    /// The resulting Path will always be normalized.
    ///
    /// # Failure
    ///
A
Alex Crichton 已提交
156
    /// Fails the task if the path contains a NUL.
K
Kevin Ballard 已提交
157 158
    ///
    /// See individual Path impls for additional restrictions.
159
    #[inline]
160
    fn new<T: BytesContainer>(path: T) -> Self {
161
        assert!(!contains_nul(&path));
A
Alex Crichton 已提交
162
        unsafe { GenericPathUnsafe::new_unchecked(path) }
163 164
    }

165
    /// Creates a new Path from a byte vector or string, if possible.
166 167
    /// The resulting Path will always be normalized.
    #[inline]
168
    fn new_opt<T: BytesContainer>(path: T) -> Option<Self> {
169
        if contains_nul(&path) {
170 171
            None
        } else {
172
            Some(unsafe { GenericPathUnsafe::new_unchecked(path) })
173 174 175
        }
    }

176 177 178 179
    /// Returns the path as a string, if possible.
    /// If the path is not representable in utf-8, this returns None.
    #[inline]
    fn as_str<'a>(&'a self) -> Option<&'a str> {
180
        str::from_utf8(self.as_vec())
181 182 183 184 185
    }

    /// Returns the path as a byte vector
    fn as_vec<'a>(&'a self) -> &'a [u8];

186
    /// Converts the Path into an owned byte vector
187
    fn into_vec(self) -> Vec<u8>;
188

189
    /// Returns an object that implements `Show` for printing paths
190 191 192
    ///
    /// This will print the equivalent of `to_display_str()` when used with a {} format parameter.
    fn display<'a>(&'a self) -> Display<'a, Self> {
193
        Display{ path: self, filename: false }
194 195
    }

196
    /// Returns an object that implements `Show` for printing filenames
197 198 199
    ///
    /// This will print the equivalent of `to_filename_display_str()` when used with a {}
    /// format parameter. If there is no filename, nothing will be printed.
200 201
    fn filename_display<'a>(&'a self) -> Display<'a, Self> {
        Display{ path: self, filename: true }
202 203
    }

204 205 206 207 208 209 210
    /// Returns the directory component of `self`, as a byte vector (with no trailing separator).
    /// If `self` has no directory component, returns ['.'].
    fn dirname<'a>(&'a self) -> &'a [u8];
    /// Returns the directory component of `self`, as a string, if possible.
    /// See `dirname` for details.
    #[inline]
    fn dirname_str<'a>(&'a self) -> Option<&'a str> {
211
        str::from_utf8(self.dirname())
212 213
    }
    /// Returns the file component of `self`, as a byte vector.
214 215 216
    /// If `self` represents the root of the file hierarchy, returns None.
    /// If `self` is "." or "..", returns None.
    fn filename<'a>(&'a self) -> Option<&'a [u8]>;
217 218 219 220
    /// Returns the file component of `self`, as a string, if possible.
    /// See `filename` for details.
    #[inline]
    fn filename_str<'a>(&'a self) -> Option<&'a str> {
221
        self.filename().and_then(str::from_utf8)
222 223 224 225
    }
    /// Returns the stem of the filename of `self`, as a byte vector.
    /// The stem is the portion of the filename just before the last '.'.
    /// If there is no '.', the entire filename is returned.
226 227 228 229 230 231 232 233 234 235 236
    fn filestem<'a>(&'a self) -> Option<&'a [u8]> {
        match self.filename() {
            None => None,
            Some(name) => Some({
                let dot = '.' as u8;
                match name.rposition_elem(&dot) {
                    None | Some(0) => name,
                    Some(1) if name == bytes!("..") => name,
                    Some(pos) => name.slice_to(pos)
                }
            })
237 238 239 240 241 242
        }
    }
    /// Returns the stem of the filename of `self`, as a string, if possible.
    /// See `filestem` for details.
    #[inline]
    fn filestem_str<'a>(&'a self) -> Option<&'a str> {
243
        self.filestem().and_then(str::from_utf8)
244 245 246 247 248 249
    }
    /// Returns the extension of the filename of `self`, as an optional byte vector.
    /// The extension is the portion of the filename just after the last '.'.
    /// If there is no extension, None is returned.
    /// If the filename ends in '.', the empty vector is returned.
    fn extension<'a>(&'a self) -> Option<&'a [u8]> {
250 251 252 253 254 255 256 257 258 259
        match self.filename() {
            None => None,
            Some(name) => {
                let dot = '.' as u8;
                match name.rposition_elem(&dot) {
                    None | Some(0) => None,
                    Some(1) if name == bytes!("..") => None,
                    Some(pos) => Some(name.slice_from(pos+1))
                }
            }
260 261 262 263 264 265
        }
    }
    /// Returns the extension of the filename of `self`, as a string, if possible.
    /// See `extension` for details.
    #[inline]
    fn extension_str<'a>(&'a self) -> Option<&'a str> {
266
        self.extension().and_then(str::from_utf8)
267 268
    }

269
    /// Replaces the filename portion of the path with the given byte vector or string.
270 271 272 273
    /// If the replacement name is [], this is equivalent to popping the path.
    ///
    /// # Failure
    ///
A
Alex Crichton 已提交
274
    /// Fails the task if the filename contains a NUL.
275
    #[inline]
276
    fn set_filename<T: BytesContainer>(&mut self, filename: T) {
277
        assert!(!contains_nul(&filename));
A
Alex Crichton 已提交
278
        unsafe { self.set_filename_unchecked(filename) }
279
    }
280
    /// Replaces the extension with the given byte vector or string.
281
    /// If there is no extension in `self`, this adds one.
282
    /// If the argument is [] or "", this removes the extension.
283 284 285 286
    /// If `self` has no filename, this is a no-op.
    ///
    /// # Failure
    ///
A
Alex Crichton 已提交
287
    /// Fails the task if the extension contains a NUL.
288
    fn set_extension<T: BytesContainer>(&mut self, extension: T) {
289
        assert!(!contains_nul(&extension));
290 291 292 293 294 295

        let val = self.filename().and_then(|name| {
            let dot = '.' as u8;
            let extlen = extension.container_as_bytes().len();
            match (name.rposition_elem(&dot), extlen) {
                (None, 0) | (Some(0), 0) => None,
296
                (Some(idx), 0) => Some(Vec::from_slice(name.slice_to(idx))),
297 298 299 300 301 302 303
                (idx, extlen) => {
                    let idx = match idx {
                        None | Some(0) => name.len(),
                        Some(val) => val
                    };

                    let mut v;
304
                    v = Vec::with_capacity(idx + extlen + 1);
305 306 307 308
                    v.push_all(name.slice_to(idx));
                    v.push(dot);
                    v.push_all(extension.container_as_bytes());
                    Some(v)
309
                }
310
            }
311 312
        });

313 314 315 316 317 318
        match val {
            None => (),
            Some(v) => unsafe { self.set_filename_unchecked(v) }
        }
    }

319 320
    /// Returns a new Path constructed by replacing the filename with the given
    /// byte vector or string.
321 322 323 324
    /// See `set_filename` for details.
    ///
    /// # Failure
    ///
A
Alex Crichton 已提交
325
    /// Fails the task if the filename contains a NUL.
326
    #[inline]
327
    fn with_filename<T: BytesContainer>(&self, filename: T) -> Self {
328 329 330 331
        let mut p = self.clone();
        p.set_filename(filename);
        p
    }
332 333
    /// Returns a new Path constructed by setting the extension to the given
    /// byte vector or string.
334 335 336 337
    /// See `set_extension` for details.
    ///
    /// # Failure
    ///
A
Alex Crichton 已提交
338
    /// Fails the task if the extension contains a NUL.
339
    #[inline]
340
    fn with_extension<T: BytesContainer>(&self, extension: T) -> Self {
341 342 343 344 345 346 347 348
        let mut p = self.clone();
        p.set_extension(extension);
        p
    }

    /// Returns the directory component of `self`, as a Path.
    /// If `self` represents the root of the filesystem hierarchy, returns `self`.
    fn dir_path(&self) -> Self {
K
Kevin Ballard 已提交
349
        // self.dirname() returns a NUL-free vector
350
        unsafe { GenericPathUnsafe::new_unchecked(self.dirname()) }
351 352
    }

353 354
    /// Returns a Path that represents the filesystem root that `self` is rooted in.
    ///
355
    /// If `self` is not absolute, or vol/cwd-relative in the case of Windows, this returns None.
356 357
    fn root_path(&self) -> Option<Self>;

358
    /// Pushes a path (as a byte vector or string) onto `self`.
359 360 361 362
    /// If the argument represents an absolute path, it replaces `self`.
    ///
    /// # Failure
    ///
A
Alex Crichton 已提交
363
    /// Fails the task if the path contains a NUL.
364
    #[inline]
365
    fn push<T: BytesContainer>(&mut self, path: T) {
366
        assert!(!contains_nul(&path));
A
Alex Crichton 已提交
367
        unsafe { self.push_unchecked(path) }
368
    }
369
    /// Pushes multiple paths (as byte vectors or strings) onto `self`.
370 371
    /// See `push` for details.
    #[inline]
372 373 374 375
    fn push_many<T: BytesContainer>(&mut self, paths: &[T]) {
        let t: Option<T> = None;
        if BytesContainer::is_str(t) {
            for p in paths.iter() {
376
                self.push(p.container_as_str().unwrap())
377 378 379 380 381
            }
        } else {
            for p in paths.iter() {
                self.push(p.container_as_bytes())
            }
382 383
        }
    }
K
Kevin Ballard 已提交
384 385 386 387
    /// Removes the last path component from the receiver.
    /// Returns `true` if the receiver was modified, or `false` if it already
    /// represented the root of the file hierarchy.
    fn pop(&mut self) -> bool;
388

389 390
    /// Returns a new Path constructed by joining `self` with the given path
    /// (as a byte vector or string).
391 392 393 394
    /// If the given path is absolute, the new Path will represent just that.
    ///
    /// # Failure
    ///
A
Alex Crichton 已提交
395
    /// Fails the task if the path contains a NUL.
396
    #[inline]
397
    fn join<T: BytesContainer>(&self, path: T) -> Self {
398 399 400 401
        let mut p = self.clone();
        p.push(path);
        p
    }
402 403
    /// Returns a new Path constructed by joining `self` with the given paths
    /// (as byte vectors or strings).
404 405
    /// See `join` for details.
    #[inline]
406
    fn join_many<T: BytesContainer>(&self, paths: &[T]) -> Self {
407 408 409 410
        let mut p = self.clone();
        p.push_many(paths);
        p
    }
411 412

    /// Returns whether `self` represents an absolute path.
K
Kevin Ballard 已提交
413 414
    /// An absolute path is defined as one that, when joined to another path, will
    /// yield back the same absolute path.
415 416
    fn is_absolute(&self) -> bool;

417 418 419 420 421 422 423 424
    /// Returns whether `self` represents a relative path.
    /// Typically this is the inverse of `is_absolute`.
    /// But for Windows paths, it also means the path is not volume-relative or
    /// relative to the current working directory.
    fn is_relative(&self) -> bool {
        !self.is_absolute()
    }

425 426 427 428 429 430 431 432 433 434
    /// Returns whether `self` is equal to, or is an ancestor of, the given path.
    /// If both paths are relative, they are compared as though they are relative
    /// to the same parent path.
    fn is_ancestor_of(&self, other: &Self) -> bool;

    /// Returns the Path that, were it joined to `base`, would yield `self`.
    /// If no such path exists, None is returned.
    /// If `self` is absolute and `base` is relative, or on Windows if both
    /// paths refer to separate drives, an absolute path is returned.
    fn path_relative_from(&self, base: &Self) -> Option<Self>;
435

436 437 438 439 440 441 442 443
    /// Returns whether the relative path `child` is a suffix of `self`.
    fn ends_with_path(&self, child: &Self) -> bool;
}

/// A trait that represents something bytes-like (e.g. a &[u8] or a &str)
pub trait BytesContainer {
    /// Returns a &[u8] representing the receiver
    fn container_as_bytes<'a>(&'a self) -> &'a [u8];
444
    /// Consumes the receiver and converts it into Vec<u8>
445
    #[inline]
446 447
    fn container_into_owned_bytes(self) -> Vec<u8> {
        Vec::from_slice(self.container_as_bytes())
448 449 450
    }
    /// Returns the receiver interpreted as a utf-8 string, if possible
    #[inline]
451
    fn container_as_str<'a>(&'a self) -> Option<&'a str> {
452
        str::from_utf8(self.container_as_bytes())
453
    }
454
    /// Returns whether .container_as_str() is guaranteed to not fail
455 456 457
    // FIXME (#8888): Remove unused arg once ::<for T> works
    #[inline]
    fn is_str(_: Option<Self>) -> bool { false }
458 459 460 461
}

/// A trait that represents the unsafe operations on GenericPaths
pub trait GenericPathUnsafe {
462
    /// Creates a new Path without checking for null bytes.
463
    /// The resulting Path will always be normalized.
464
    unsafe fn new_unchecked<T: BytesContainer>(path: T) -> Self;
465

466 467
    /// Replaces the filename portion of the path without checking for null
    /// bytes.
468
    /// See `set_filename` for details.
469
    unsafe fn set_filename_unchecked<T: BytesContainer>(&mut self, filename: T);
K
Kevin Ballard 已提交
470

471
    /// Pushes a path onto `self` without checking for null bytes.
472
    /// See `push` for details.
473
    unsafe fn push_unchecked<T: BytesContainer>(&mut self, path: T);
474 475
}

476
/// Helper struct for printing paths with format!()
E
Erik Price 已提交
477
pub struct Display<'a, P> {
478 479
    path: &'a P,
    filename: bool
480 481
}

482
impl<'a, P: GenericPath> fmt::Show for Display<'a, P> {
483
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
484
        self.as_maybe_owned().as_slice().fmt(f)
485 486 487
    }
}

E
Erik Price 已提交
488
impl<'a, P: GenericPath> Display<'a, P> {
489
    /// Returns the path as a possibly-owned string.
490 491 492 493
    ///
    /// If the path is not UTF-8, invalid sequences will be replaced with the
    /// unicode replacement char. This involves allocation.
    #[inline]
494 495 496 497 498
    pub fn as_maybe_owned(&self) -> MaybeOwned<'a> {
        from_utf8_lossy(if self.filename {
            match self.path.filename() {
                None => &[],
                Some(v) => v
499
            }
500 501 502
        } else {
            self.path.as_vec()
        })
503 504 505
    }
}

E
Erik Price 已提交
506
impl<'a> BytesContainer for &'a str {
507 508 509 510 511
    #[inline]
    fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
        self.as_bytes()
    }
    #[inline]
512
    fn container_as_str<'a>(&'a self) -> Option<&'a str> {
513 514 515
        Some(*self)
    }
    #[inline]
E
Erik Price 已提交
516
    fn is_str(_: Option<&'a str>) -> bool { true }
517 518 519 520 521 522 523 524
}

impl BytesContainer for ~str {
    #[inline]
    fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
        self.as_bytes()
    }
    #[inline]
525
    fn container_as_str<'a>(&'a self) -> Option<&'a str> {
526 527 528 529 530 531
        Some(self.as_slice())
    }
    #[inline]
    fn is_str(_: Option<~str>) -> bool { true }
}

E
Erik Price 已提交
532
impl<'a> BytesContainer for &'a [u8] {
533 534 535 536 537 538 539 540 541 542 543
    #[inline]
    fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
        *self
    }
}

impl BytesContainer for ~[u8] {
    #[inline]
    fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
        self.as_slice()
    }
544 545 546 547 548 549 550
}

impl BytesContainer for Vec<u8> {
    #[inline]
    fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
        self.as_slice()
    }
551
    #[inline]
552
    fn container_into_owned_bytes(self) -> Vec<u8> {
553 554 555 556
        self
    }
}

557 558 559
impl BytesContainer for CString {
    #[inline]
    fn container_as_bytes<'a>(&'a self) -> &'a [u8] {
560
        self.as_bytes_no_nul()
561 562 563
    }
}

564 565 566 567 568 569 570 571 572 573 574 575 576
impl<'a> BytesContainer for str::MaybeOwned<'a> {
    #[inline]
    fn container_as_bytes<'b>(&'b self) -> &'b [u8] {
        self.as_slice().as_bytes()
    }
    #[inline]
    fn container_as_str<'b>(&'b self) -> Option<&'b str> {
        Some(self.as_slice())
    }
    #[inline]
    fn is_str(_: Option<str::MaybeOwned>) -> bool { true }
}

577
#[inline(always)]
578 579
fn contains_nul<T: BytesContainer>(v: &T) -> bool {
    v.container_as_bytes().iter().any(|&x| x == 0)
580
}
K
Kevin Ballard 已提交
581

582 583
#[cfg(test)]
mod tests {
584
    use prelude::*;
585 586 587 588
    use super::{GenericPath, PosixPath, WindowsPath};
    use c_str::ToCStr;

    #[test]
589
    fn test_cstring() {
590
        let input = "/foo/bar/baz";
591
        let path: PosixPath = PosixPath::new(input.to_c_str());
592 593
        assert_eq!(path.as_vec(), input.as_bytes());

594
        let input = r"\foo\bar\baz";
595
        let path: WindowsPath = WindowsPath::new(input.to_c_str());
596
        assert_eq!(path.as_str().unwrap(), input.as_slice());
597 598
    }
}