string.rs 44.6 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
10 11
//
// ignore-lexer-test FIXME #15679
12 13 14

//! An owned, growable string that enforces that its contents are valid UTF-8.

15 16
#![stable]

17 18
use core::prelude::*;

19
use core::borrow::{Cow, IntoCow};
20
use core::cmp::Equiv;
21 22
use core::default::Default;
use core::fmt;
23
use core::hash;
24
use core::iter::FromIterator;
25
use core::mem;
26
use core::ops::{mod, Deref, Add};
27
use core::ptr;
28
use core::raw::Slice as RawSlice;
A
Alex Crichton 已提交
29 30
use unicode::str as unicode_str;
use unicode::str::Utf16Item;
31

A
Alex Crichton 已提交
32
use str::{mod, CharRange, FromStr, Utf8Error};
33
use vec::{DerefVec, Vec, as_vec};
34

35
/// A growable string stored as a UTF-8 encoded buffer.
J
Jorge Aparicio 已提交
36
#[deriving(Clone, PartialOrd, Eq, Ord)]
A
Alex Crichton 已提交
37
#[stable]
38
pub struct String {
39 40 41
    vec: Vec<u8>,
}

42 43 44 45 46 47 48 49 50 51 52 53
/// A possible error value from the `String::from_utf8` function.
#[stable]
pub struct FromUtf8Error {
    bytes: Vec<u8>,
    error: Utf8Error,
}

/// A possible error value from the `String::from_utf16` function.
#[stable]
#[allow(missing_copy_implementations)]
pub struct FromUtf16Error(());

54
impl String {
J
Joseph Crail 已提交
55
    /// Creates a new string buffer initialized with the empty string.
J
Jonas Hietala 已提交
56
    ///
57
    /// # Examples
J
Jonas Hietala 已提交
58 59 60 61
    ///
    /// ```
    /// let mut s = String::new();
    /// ```
62
    #[inline]
A
Alex Crichton 已提交
63
    #[stable]
64 65
    pub fn new() -> String {
        String {
66 67 68 69 70
            vec: Vec::new(),
        }
    }

    /// Creates a new string buffer with the given capacity.
J
Jonas Hietala 已提交
71 72 73
    /// The string will be able to hold exactly `capacity` bytes without
    /// reallocating. If `capacity` is 0, the string will not allocate.
    ///
74
    /// # Examples
J
Jonas Hietala 已提交
75 76 77 78
    ///
    /// ```
    /// let mut s = String::with_capacity(10);
    /// ```
79
    #[inline]
A
Alex Crichton 已提交
80
    #[stable]
81 82
    pub fn with_capacity(capacity: uint) -> String {
        String {
83 84 85 86 87
            vec: Vec::with_capacity(capacity),
        }
    }

    /// Creates a new string buffer from the given string.
J
Jonas Hietala 已提交
88
    ///
89
    /// # Examples
J
Jonas Hietala 已提交
90 91 92 93 94
    ///
    /// ```
    /// let s = String::from_str("hello");
    /// assert_eq!(s.as_slice(), "hello");
    /// ```
95
    #[inline]
A
Alex Crichton 已提交
96
    #[experimental = "needs investigation to see if to_string() can match perf"]
97
    pub fn from_str(string: &str) -> String {
98
        String { vec: ::slice::SliceExt::to_vec(string.as_bytes()) }
99 100
    }

101 102 103
    /// Returns the vector as a string buffer, if possible, taking care not to
    /// copy it.
    ///
A
Alex Crichton 已提交
104 105 106 107
    /// # Failure
    ///
    /// If the given vector is not valid UTF-8, then the original vector and the
    /// corresponding error is returned.
108
    ///
109
    /// # Examples
110 111
    ///
    /// ```rust
A
Alex Crichton 已提交
112 113 114
    /// # #![allow(deprecated)]
    /// use std::str::Utf8Error;
    ///
115
    /// let hello_vec = vec![104, 101, 108, 108, 111];
116 117
    /// let s = String::from_utf8(hello_vec).unwrap();
    /// assert_eq!(s, "hello");
J
Jonas Hietala 已提交
118 119
    ///
    /// let invalid_vec = vec![240, 144, 128];
120 121 122
    /// let s = String::from_utf8(invalid_vec).err().unwrap();
    /// assert_eq!(s.utf8_error(), Utf8Error::TooShort);
    /// assert_eq!(s.into_bytes(), vec![240, 144, 128]);
123
    /// ```
124
    #[inline]
125 126
    #[stable]
    pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
A
Alex Crichton 已提交
127 128
        match str::from_utf8(vec.as_slice()) {
            Ok(..) => Ok(String { vec: vec }),
129
            Err(e) => Err(FromUtf8Error { bytes: vec, error: e })
130 131
        }
    }
132

P
P1start 已提交
133 134
    /// Converts a vector of bytes to a new UTF-8 string.
    /// Any invalid UTF-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
135
    ///
136
    /// # Examples
137 138 139
    ///
    /// ```rust
    /// let input = b"Hello \xF0\x90\x80World";
A
Adolfo Ochagavía 已提交
140
    /// let output = String::from_utf8_lossy(input);
A
Alex Crichton 已提交
141
    /// assert_eq!(output.as_slice(), "Hello \u{FFFD}World");
142
    /// ```
143
    #[stable]
144
    pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
145
        let mut i = 0;
A
Alex Crichton 已提交
146 147
        match str::from_utf8(v) {
            Ok(s) => return Cow::Borrowed(s),
148 149 150 151 152
            Err(e) => {
                if let Utf8Error::InvalidByte(firstbad) = e {
                    i = firstbad;
                }
            }
153 154 155 156 157 158
        }

        static TAG_CONT_U8: u8 = 128u8;
        static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
        let total = v.len();
        fn unsafe_get(xs: &[u8], i: uint) -> u8 {
A
Aaron Turon 已提交
159
            unsafe { *xs.get_unchecked(i) }
160 161 162 163 164 165 166 167 168 169 170 171 172
        }
        fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
            if i >= total {
                0
            } else {
                unsafe_get(xs, i)
            }
        }

        let mut res = String::with_capacity(total);

        if i > 0 {
            unsafe {
173
                res.as_mut_vec().push_all(v[..i])
174 175 176 177 178 179
            };
        }

        // subseqidx is the index of the first byte of the subsequence we're looking at.
        // It's used to copy a bunch of contiguous good codepoints at once instead of copying
        // them one by one.
180
        let mut subseqidx = i;
181 182 183 184 185 186 187 188 189

        while i < total {
            let i_ = i;
            let byte = unsafe_get(v, i);
            i += 1;

            macro_rules! error(() => ({
                unsafe {
                    if subseqidx != i_ {
190
                        res.as_mut_vec().push_all(v[subseqidx..i_]);
191 192
                    }
                    subseqidx = i;
193
                    res.as_mut_vec().push_all(REPLACEMENT);
194
                }
195
            }));
196 197 198 199

            if byte < 128u8 {
                // subseqidx handles this
            } else {
A
Alex Crichton 已提交
200
                let w = unicode_str::utf8_char_width(byte);
201 202 203 204 205 206 207 208 209 210 211

                match w {
                    2 => {
                        if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
                            error!();
                            continue;
                        }
                        i += 1;
                    }
                    3 => {
                        match (byte, safe_get(v, i, total)) {
212 213 214 215
                            (0xE0         , 0xA0 ... 0xBF) => (),
                            (0xE1 ... 0xEC, 0x80 ... 0xBF) => (),
                            (0xED         , 0x80 ... 0x9F) => (),
                            (0xEE ... 0xEF, 0x80 ... 0xBF) => (),
216 217 218 219 220 221 222 223 224 225 226 227 228 229
                            _ => {
                                error!();
                                continue;
                            }
                        }
                        i += 1;
                        if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
                            error!();
                            continue;
                        }
                        i += 1;
                    }
                    4 => {
                        match (byte, safe_get(v, i, total)) {
230 231 232
                            (0xF0         , 0x90 ... 0xBF) => (),
                            (0xF1 ... 0xF3, 0x80 ... 0xBF) => (),
                            (0xF4         , 0x80 ... 0x8F) => (),
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
                            _ => {
                                error!();
                                continue;
                            }
                        }
                        i += 1;
                        if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
                            error!();
                            continue;
                        }
                        i += 1;
                        if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
                            error!();
                            continue;
                        }
                        i += 1;
                    }
                    _ => {
                        error!();
                        continue;
                    }
                }
            }
        }
        if subseqidx < total {
            unsafe {
259
                res.as_mut_vec().push_all(v[subseqidx..total])
260 261
            };
        }
A
Alex Crichton 已提交
262
        Cow::Owned(res)
263 264
    }

A
Adolfo Ochagavía 已提交
265
    /// Decode a UTF-16 encoded vector `v` into a `String`, returning `None`
A
Adolfo Ochagavía 已提交
266 267
    /// if `v` contains any invalid data.
    ///
268
    /// # Examples
A
Adolfo Ochagavía 已提交
269 270
    ///
    /// ```rust
A
Adolfo Ochagavía 已提交
271
    /// // 𝄞music
N
Nick Cameron 已提交
272 273
    /// let mut v = &mut [0xD834, 0xDD1E, 0x006d, 0x0075,
    ///                   0x0073, 0x0069, 0x0063];
274 275
    /// assert_eq!(String::from_utf16(v).unwrap(),
    ///            "𝄞music".to_string());
A
Adolfo Ochagavía 已提交
276
    ///
A
Adolfo Ochagavía 已提交
277
    /// // 𝄞mu<invalid>ic
A
Adolfo Ochagavía 已提交
278
    /// v[4] = 0xD800;
279
    /// assert!(String::from_utf16(v).is_err());
A
Adolfo Ochagavía 已提交
280
    /// ```
281 282
    #[stable]
    pub fn from_utf16(v: &[u16]) -> Result<String, FromUtf16Error> {
283
        let mut s = String::with_capacity(v.len());
A
Alex Crichton 已提交
284
        for c in unicode_str::utf16_items(v) {
A
Adolfo Ochagavía 已提交
285
            match c {
A
Alex Crichton 已提交
286
                Utf16Item::ScalarValue(c) => s.push(c),
287
                Utf16Item::LoneSurrogate(_) => return Err(FromUtf16Error(())),
A
Adolfo Ochagavía 已提交
288 289
            }
        }
290
        Ok(s)
A
Adolfo Ochagavía 已提交
291
    }
292

293 294 295
    /// Decode a UTF-16 encoded vector `v` into a string, replacing
    /// invalid data with the replacement character (U+FFFD).
    ///
296 297
    /// # Examples
    ///
298
    /// ```rust
A
Adolfo Ochagavía 已提交
299
    /// // 𝄞mus<invalid>ic<invalid>
N
Nick Cameron 已提交
300 301 302
    /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
    ///           0x0073, 0xDD1E, 0x0069, 0x0063,
    ///           0xD834];
303 304
    ///
    /// assert_eq!(String::from_utf16_lossy(v),
A
Alex Crichton 已提交
305
    ///            "𝄞mus\u{FFFD}ic\u{FFFD}".to_string());
306
    /// ```
A
Alex Crichton 已提交
307
    #[stable]
308
    pub fn from_utf16_lossy(v: &[u16]) -> String {
A
Alex Crichton 已提交
309
        unicode_str::utf16_items(v).map(|c| c.to_char_lossy()).collect()
310
    }
A
Adolfo Ochagavía 已提交
311

P
P1start 已提交
312
    /// Convert a vector of `char`s to a `String`.
A
Adolfo Ochagavía 已提交
313
    ///
314
    /// # Examples
A
Adolfo Ochagavía 已提交
315 316
    ///
    /// ```rust
317
    /// # #![allow(deprecated)]
N
Nick Cameron 已提交
318
    /// let chars = &['h', 'e', 'l', 'l', 'o'];
J
Jonas Hietala 已提交
319 320
    /// let s = String::from_chars(chars);
    /// assert_eq!(s.as_slice(), "hello");
A
Adolfo Ochagavía 已提交
321 322
    /// ```
    #[inline]
323
    #[deprecated = "use .collect() instead"]
A
Adolfo Ochagavía 已提交
324 325 326
    pub fn from_chars(chs: &[char]) -> String {
        chs.iter().map(|c| *c).collect()
    }
327

328 329 330 331 332 333
    /// Creates a new `String` from a length, capacity, and pointer.
    ///
    /// This is unsafe because:
    /// * We call `Vec::from_raw_parts` to get a `Vec<u8>`;
    /// * We assume that the `Vec` contains valid UTF-8.
    #[inline]
334
    #[stable]
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
    pub unsafe fn from_raw_parts(buf: *mut u8, length: uint, capacity: uint) -> String {
        String {
            vec: Vec::from_raw_parts(buf, length, capacity),
        }
    }

    /// Creates a `String` from a null-terminated `*const u8` buffer.
    ///
    /// This function is unsafe because we dereference memory until we find the
    /// NUL character, which is not guaranteed to be present. Additionally, the
    /// slice is not checked to see whether it contains valid UTF-8
    #[unstable = "just renamed from `mod raw`"]
    pub unsafe fn from_raw_buf(buf: *const u8) -> String {
        String::from_str(str::from_c_str(buf as *const i8))
    }

    /// Creates a `String` from a `*const u8` buffer of the given length.
    ///
    /// This function is unsafe because it blindly assumes the validity of the
    /// pointer `buf` for `len` bytes of memory. This function will copy the
    /// memory from `buf` into a new allocation (owned by the returned
    /// `String`).
    ///
    /// This function is also unsafe because it does not validate that the
    /// buffer is valid UTF-8 encoded data.
    #[unstable = "just renamed from `mod raw`"]
    pub unsafe fn from_raw_buf_len(buf: *const u8, len: uint) -> String {
        String::from_utf8_unchecked(Vec::from_raw_buf(buf, len))
    }

    /// Converts a vector of bytes to a new `String` without checking if
    /// it contains valid UTF-8. This is unsafe because it assumes that
    /// the UTF-8-ness of the vector has already been validated.
    #[inline]
369
    #[stable]
370 371 372 373
    pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {
        String { vec: bytes }
    }

374
    /// Return the underlying byte buffer, encoded as UTF-8.
J
Jonas Hietala 已提交
375
    ///
376
    /// # Examples
J
Jonas Hietala 已提交
377 378 379 380 381 382
    ///
    /// ```
    /// let s = String::from_str("hello");
    /// let bytes = s.into_bytes();
    /// assert_eq!(bytes, vec![104, 101, 108, 108, 111]);
    /// ```
383
    #[inline]
A
Alex Crichton 已提交
384
    #[stable]
385 386 387 388
    pub fn into_bytes(self) -> Vec<u8> {
        self.vec
    }

389
    /// Creates a string buffer by repeating a character `length` times.
J
Jonas Hietala 已提交
390
    ///
391
    /// # Examples
J
Jonas Hietala 已提交
392 393
    ///
    /// ```
394
    /// # #![allow(deprecated)]
J
Jonas Hietala 已提交
395 396 397
    /// let s = String::from_char(5, 'a');
    /// assert_eq!(s.as_slice(), "aaaaa");
    /// ```
398
    #[inline]
399
    #[deprecated = "use repeat(ch).take(length).collect() instead"]
400
    pub fn from_char(length: uint, ch: char) -> String {
401
        if length == 0 {
402
            return String::new()
403 404
        }

405
        let mut buf = String::new();
406
        buf.push(ch);
407 408
        let size = buf.len() * (length - 1);
        buf.reserve_exact(size);
409
        for _ in range(1, length) {
410
            buf.push(ch)
411 412 413 414 415
        }
        buf
    }

    /// Pushes the given string onto this string buffer.
J
Jonas Hietala 已提交
416
    ///
417
    /// # Examples
J
Jonas Hietala 已提交
418 419 420 421 422 423
    ///
    /// ```
    /// let mut s = String::from_str("foo");
    /// s.push_str("bar");
    /// assert_eq!(s.as_slice(), "foobar");
    /// ```
424
    #[inline]
425
    #[stable]
426 427 428 429
    pub fn push_str(&mut self, string: &str) {
        self.vec.push_all(string.as_bytes())
    }

P
P1start 已提交
430
    /// Pushes `ch` onto the given string `count` times.
J
Jonas Hietala 已提交
431
    ///
432
    /// # Examples
J
Jonas Hietala 已提交
433 434
    ///
    /// ```
435
    /// # #![allow(deprecated)]
J
Jonas Hietala 已提交
436 437 438 439
    /// let mut s = String::from_str("foo");
    /// s.grow(5, 'Z');
    /// assert_eq!(s.as_slice(), "fooZZZZZ");
    /// ```
440
    #[inline]
441
    #[deprecated = "deprecated in favor of .extend(repeat(ch).take(count))"]
442 443
    pub fn grow(&mut self, count: uint, ch: char) {
        for _ in range(0, count) {
444
            self.push(ch)
445 446 447
        }
    }

448 449
    /// Returns the number of bytes that this string buffer can hold without
    /// reallocating.
A
Alex Crichton 已提交
450
    ///
451
    /// # Examples
A
Alex Crichton 已提交
452 453 454
    ///
    /// ```
    /// let s = String::with_capacity(10);
455
    /// assert!(s.capacity() >= 10);
A
Alex Crichton 已提交
456 457
    /// ```
    #[inline]
458
    #[stable]
A
Alex Crichton 已提交
459 460 461 462
    pub fn capacity(&self) -> uint {
        self.vec.capacity()
    }

463 464
    /// Deprecated: Renamed to `reserve`.
    #[deprecated = "Renamed to `reserve`"]
465
    pub fn reserve_additional(&mut self, extra: uint) {
466
        self.vec.reserve(extra)
467 468
    }

469 470 471
    /// Reserves capacity for at least `additional` more bytes to be inserted
    /// in the given `String`. The collection may reserve more space to avoid
    /// frequent reallocations.
472 473 474 475
    ///
    /// # Panics
    ///
    /// Panics if the new capacity overflows `uint`.
J
Jonas Hietala 已提交
476
    ///
477
    /// # Examples
J
Jonas Hietala 已提交
478 479 480 481
    ///
    /// ```
    /// let mut s = String::new();
    /// s.reserve(10);
482
    /// assert!(s.capacity() >= 10);
J
Jonas Hietala 已提交
483
    /// ```
484
    #[inline]
485
    #[stable]
486 487
    pub fn reserve(&mut self, additional: uint) {
        self.vec.reserve(additional)
488 489
    }

490 491 492
    /// Reserves the minimum capacity for exactly `additional` more bytes to be
    /// inserted in the given `String`. Does nothing if the capacity is already
    /// sufficient.
493
    ///
494 495 496
    /// Note that the allocator may give the collection more space than it
    /// requests. Therefore capacity can not be relied upon to be precisely
    /// minimal. Prefer `reserve` if future insertions are expected.
497 498 499 500
    ///
    /// # Panics
    ///
    /// Panics if the new capacity overflows `uint`.
J
Jonas Hietala 已提交
501
    ///
502
    /// # Examples
J
Jonas Hietala 已提交
503 504 505
    ///
    /// ```
    /// let mut s = String::new();
506 507
    /// s.reserve(10);
    /// assert!(s.capacity() >= 10);
J
Jonas Hietala 已提交
508
    /// ```
509
    #[inline]
510
    #[stable]
511 512
    pub fn reserve_exact(&mut self, additional: uint) {
        self.vec.reserve_exact(additional)
513 514 515
    }

    /// Shrinks the capacity of this string buffer to match its length.
J
Jonas Hietala 已提交
516
    ///
517
    /// # Examples
J
Jonas Hietala 已提交
518 519 520 521
    ///
    /// ```
    /// let mut s = String::from_str("foo");
    /// s.reserve(100);
522
    /// assert!(s.capacity() >= 100);
J
Jonas Hietala 已提交
523
    /// s.shrink_to_fit();
524
    /// assert_eq!(s.capacity(), 3);
J
Jonas Hietala 已提交
525
    /// ```
526
    #[inline]
527
    #[stable]
528 529 530 531 532
    pub fn shrink_to_fit(&mut self) {
        self.vec.shrink_to_fit()
    }

    /// Adds the given character to the end of the string.
J
Jonas Hietala 已提交
533
    ///
534
    /// # Examples
J
Jonas Hietala 已提交
535 536 537
    ///
    /// ```
    /// let mut s = String::from_str("abc");
A
Alex Crichton 已提交
538 539 540
    /// s.push('1');
    /// s.push('2');
    /// s.push('3');
J
Jonas Hietala 已提交
541 542
    /// assert_eq!(s.as_slice(), "abc123");
    /// ```
543
    #[inline]
544
    #[stable]
A
Alex Crichton 已提交
545
    pub fn push(&mut self, ch: char) {
546 547 548 549 550
        if (ch as u32) < 0x80 {
            self.vec.push(ch as u8);
            return;
        }

551
        let cur_len = self.len();
552
        // This may use up to 4 bytes.
553
        self.vec.reserve(4);
554

555
        unsafe {
556 557
            // Attempt to not use an intermediate buffer by just pushing bytes
            // directly onto this string.
558
            let slice = RawSlice {
559 560 561
                data: self.vec.as_ptr().offset(cur_len as int),
                len: 4,
            };
562
            let used = ch.encode_utf8(mem::transmute(slice)).unwrap_or(0);
563 564 565 566 567
            self.vec.set_len(cur_len + used);
        }
    }

    /// Works with the underlying buffer as a byte slice.
J
Jonas Hietala 已提交
568
    ///
569
    /// # Examples
J
Jonas Hietala 已提交
570 571 572
    ///
    /// ```
    /// let s = String::from_str("hello");
N
Nick Cameron 已提交
573 574
    /// let b: &[_] = &[104, 101, 108, 108, 111];
    /// assert_eq!(s.as_bytes(), b);
J
Jonas Hietala 已提交
575
    /// ```
576
    #[inline]
A
Alex Crichton 已提交
577
    #[stable]
578 579 580 581
    pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
        self.vec.as_slice()
    }

P
P1start 已提交
582
    /// Shortens a string to the specified length.
J
Jonas Hietala 已提交
583
    ///
584
    /// # Panics
J
Jonas Hietala 已提交
585
    ///
586
    /// Panics if `new_len` > current length,
587
    /// or if `new_len` is not a character boundary.
J
Jonas Hietala 已提交
588
    ///
589
    /// # Examples
J
Jonas Hietala 已提交
590 591 592 593 594 595
    ///
    /// ```
    /// let mut s = String::from_str("hello");
    /// s.truncate(2);
    /// assert_eq!(s.as_slice(), "he");
    /// ```
596
    #[inline]
597
    #[stable]
598
    pub fn truncate(&mut self, new_len: uint) {
599
        assert!(self.is_char_boundary(new_len));
600
        self.vec.truncate(new_len)
601 602
    }

J
Jonas Hietala 已提交
603 604 605
    /// Removes the last character from the string buffer and returns it.
    /// Returns `None` if this string buffer is empty.
    ///
606
    /// # Examples
J
Jonas Hietala 已提交
607 608 609
    ///
    /// ```
    /// let mut s = String::from_str("foo");
A
Alex Crichton 已提交
610 611 612 613
    /// assert_eq!(s.pop(), Some('o'));
    /// assert_eq!(s.pop(), Some('o'));
    /// assert_eq!(s.pop(), Some('f'));
    /// assert_eq!(s.pop(), None);
J
Jonas Hietala 已提交
614
    /// ```
615
    #[inline]
616
    #[stable]
A
Alex Crichton 已提交
617
    pub fn pop(&mut self) -> Option<char> {
618 619 620 621 622
        let len = self.len();
        if len == 0 {
            return None
        }

623
        let CharRange {ch, next} = self.char_range_at_reverse(len);
624 625 626 627 628 629
        unsafe {
            self.vec.set_len(next);
        }
        Some(ch)
    }

630
    /// Removes the character from the string buffer at byte position `idx` and
631
    /// returns it.
632 633 634
    ///
    /// # Warning
    ///
635
    /// This is an O(n) operation as it requires copying every element in the
636 637
    /// buffer.
    ///
S
Steve Klabnik 已提交
638
    /// # Panics
639
    ///
640 641
    /// If `idx` does not lie on a character boundary, or if it is out of
    /// bounds, then this function will panic.
J
Jonas Hietala 已提交
642
    ///
643
    /// # Examples
J
Jonas Hietala 已提交
644 645 646
    ///
    /// ```
    /// let mut s = String::from_str("foo");
647 648 649
    /// assert_eq!(s.remove(0), 'f');
    /// assert_eq!(s.remove(1), 'o');
    /// assert_eq!(s.remove(0), 'o');
J
Jonas Hietala 已提交
650
    /// ```
651 652
    #[stable]
    pub fn remove(&mut self, idx: uint) -> char {
653
        let len = self.len();
654
        assert!(idx <= len);
655

656
        let CharRange { ch, next } = self.char_range_at(idx);
657
        unsafe {
658 659 660 661
            ptr::copy_memory(self.vec.as_mut_ptr().offset(idx as int),
                             self.vec.as_ptr().offset(next as int),
                             len - next);
            self.vec.set_len(len - (next - idx));
662
        }
663
        ch
664
    }
665

666 667 668 669
    /// Insert a character into the string buffer at byte position `idx`.
    ///
    /// # Warning
    ///
670
    /// This is an O(n) operation as it requires copying every element in the
671 672
    /// buffer.
    ///
S
Steve Klabnik 已提交
673
    /// # Panics
674 675
    ///
    /// If `idx` does not lie on a character boundary or is out of bounds, then
S
Steve Klabnik 已提交
676
    /// this function will panic.
677
    #[stable]
678 679 680
    pub fn insert(&mut self, idx: uint, ch: char) {
        let len = self.len();
        assert!(idx <= len);
681
        assert!(self.is_char_boundary(idx));
682
        self.vec.reserve(4);
683
        let mut bits = [0; 4];
N
Nick Cameron 已提交
684
        let amt = ch.encode_utf8(&mut bits).unwrap();
685 686 687 688 689 690 691 692 693 694 695 696

        unsafe {
            ptr::copy_memory(self.vec.as_mut_ptr().offset((idx + amt) as int),
                             self.vec.as_ptr().offset(idx as int),
                             len - idx);
            ptr::copy_memory(self.vec.as_mut_ptr().offset(idx as int),
                             bits.as_ptr(),
                             amt);
            self.vec.set_len(len + amt);
        }
    }

697 698
    /// Views the string buffer as a mutable sequence of bytes.
    ///
J
Jonas Hietala 已提交
699 700 701
    /// This is unsafe because it does not check
    /// to ensure that the resulting string will be valid UTF-8.
    ///
702
    /// # Examples
J
Jonas Hietala 已提交
703 704 705 706 707 708 709 710 711 712
    ///
    /// ```
    /// let mut s = String::from_str("hello");
    /// unsafe {
    ///     let vec = s.as_mut_vec();
    ///     assert!(vec == &mut vec![104, 101, 108, 108, 111]);
    ///     vec.reverse();
    /// }
    /// assert_eq!(s.as_slice(), "olleh");
    /// ```
713
    #[stable]
714 715 716
    pub unsafe fn as_mut_vec<'a>(&'a mut self) -> &'a mut Vec<u8> {
        &mut self.vec
    }
717

718 719
    /// Return the number of bytes in this string.
    ///
720
    /// # Examples
721 722 723 724 725
    ///
    /// ```
    /// let a = "foo".to_string();
    /// assert_eq!(a.len(), 3);
    /// ```
726
    #[inline]
A
Alex Crichton 已提交
727
    #[stable]
728
    pub fn len(&self) -> uint { self.vec.len() }
729

730 731
    /// Returns true if the string contains no bytes
    ///
732
    /// # Examples
733 734 735 736 737 738 739
    ///
    /// ```
    /// let mut v = String::new();
    /// assert!(v.is_empty());
    /// v.push('a');
    /// assert!(!v.is_empty());
    /// ```
740
    #[stable]
741 742 743 744
    pub fn is_empty(&self) -> bool { self.len() == 0 }

    /// Truncates the string, returning it to 0 length.
    ///
745
    /// # Examples
746 747 748 749 750 751
    ///
    /// ```
    /// let mut s = "foo".to_string();
    /// s.clear();
    /// assert!(s.is_empty());
    /// ```
752
    #[inline]
A
Alex Crichton 已提交
753
    #[stable]
754
    pub fn clear(&mut self) {
755 756 757 758
        self.vec.clear()
    }
}

759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781
impl FromUtf8Error {
    /// Consume this error, returning the bytes that were attempted to make a
    /// `String` with.
    #[stable]
    pub fn into_bytes(self) -> Vec<u8> { self.bytes }

    /// Access the underlying UTF8-error that was the cause of this error.
    #[stable]
    pub fn utf8_error(&self) -> Utf8Error { self.error }
}

impl fmt::Show for FromUtf8Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.error.fmt(f)
    }
}

impl fmt::Show for FromUtf16Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        "invalid utf-16: lone surrogate found".fmt(f)
    }
}

A
Alex Crichton 已提交
782
#[experimental = "waiting on FromIterator stabilization"]
783 784 785
impl FromIterator<char> for String {
    fn from_iter<I:Iterator<char>>(iterator: I) -> String {
        let mut buf = String::new();
786 787 788 789 790
        buf.extend(iterator);
        buf
    }
}

791 792 793 794 795 796 797 798 799
#[experimental = "waiting on FromIterator stabilization"]
impl<'a> FromIterator<&'a str> for String {
    fn from_iter<I:Iterator<&'a str>>(iterator: I) -> String {
        let mut buf = String::new();
        buf.extend(iterator);
        buf
    }
}

G
gamazeps 已提交
800 801
#[experimental = "waiting on Extend stabilization"]
impl Extend<char> for String {
802
    fn extend<I:Iterator<char>>(&mut self, mut iterator: I) {
803 804
        let (lower_bound, _) = iterator.size_hint();
        self.reserve(lower_bound);
805
        for ch in iterator {
806
            self.push(ch)
807 808 809 810
        }
    }
}

811 812 813 814 815 816 817 818 819 820 821 822
#[experimental = "waiting on Extend stabilization"]
impl<'a> Extend<&'a str> for String {
    fn extend<I: Iterator<&'a str>>(&mut self, mut iterator: I) {
        // A guess that at least one byte per iterator element will be needed.
        let (lower_bound, _) = iterator.size_hint();
        self.reserve(lower_bound);
        for s in iterator {
            self.push_str(s)
        }
    }
}

A
Aaron Turon 已提交
823
#[stable]
J
Jorge Aparicio 已提交
824 825 826 827 828 829 830 831 832
impl PartialEq for String {
    #[inline]
    fn eq(&self, other: &String) -> bool { PartialEq::eq(&**self, &**other) }
    #[inline]
    fn ne(&self, other: &String) -> bool { PartialEq::ne(&**self, &**other) }
}

macro_rules! impl_eq {
    ($lhs:ty, $rhs: ty) => {
A
Aaron Turon 已提交
833
        #[stable]
J
Jorge Aparicio 已提交
834 835 836 837 838 839 840
        impl<'a> PartialEq<$rhs> for $lhs {
            #[inline]
            fn eq(&self, other: &$rhs) -> bool { PartialEq::eq(&**self, &**other) }
            #[inline]
            fn ne(&self, other: &$rhs) -> bool { PartialEq::ne(&**self, &**other) }
        }

A
Aaron Turon 已提交
841
        #[stable]
J
Jorge Aparicio 已提交
842 843 844 845 846 847 848 849 850 851
        impl<'a> PartialEq<$lhs> for $rhs {
            #[inline]
            fn eq(&self, other: &$lhs) -> bool { PartialEq::eq(&**self, &**other) }
            #[inline]
            fn ne(&self, other: &$lhs) -> bool { PartialEq::ne(&**self, &**other) }
        }

    }
}

852 853
impl_eq! { String, &'a str }
impl_eq! { CowString<'a>, String }
J
Jorge Aparicio 已提交
854

A
Aaron Turon 已提交
855
#[stable]
J
Jorge Aparicio 已提交
856 857 858 859 860 861 862
impl<'a, 'b> PartialEq<&'b str> for CowString<'a> {
    #[inline]
    fn eq(&self, other: &&'b str) -> bool { PartialEq::eq(&**self, &**other) }
    #[inline]
    fn ne(&self, other: &&'b str) -> bool { PartialEq::ne(&**self, &**other) }
}

A
Aaron Turon 已提交
863
#[stable]
J
Jorge Aparicio 已提交
864 865 866 867 868 869 870
impl<'a, 'b> PartialEq<CowString<'a>> for &'b str {
    #[inline]
    fn eq(&self, other: &CowString<'a>) -> bool { PartialEq::eq(&**self, &**other) }
    #[inline]
    fn ne(&self, other: &CowString<'a>) -> bool { PartialEq::ne(&**self, &**other) }
}

A
Alex Crichton 已提交
871
#[experimental = "waiting on Str stabilization"]
A
Alex Crichton 已提交
872
#[allow(deprecated)]
873
impl Str for String {
874
    #[inline]
A
Alex Crichton 已提交
875
    #[stable]
876
    fn as_slice<'a>(&'a self) -> &'a str {
A
Alex Crichton 已提交
877
        unsafe { mem::transmute(self.vec.as_slice()) }
878
    }
879 880
}

A
Alex Crichton 已提交
881
#[stable]
882
impl Default for String {
883
    #[stable]
884 885
    fn default() -> String {
        String::new()
886 887 888
    }
}

A
Alex Crichton 已提交
889
#[experimental = "waiting on Show stabilization"]
890
impl fmt::Show for String {
891
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
A
Alex Crichton 已提交
892
        (**self).fmt(f)
893 894 895
    }
}

A
Alex Crichton 已提交
896
#[experimental = "waiting on Hash stabilization"]
897
impl<H: hash::Writer> hash::Hash<H> for String {
898 899
    #[inline]
    fn hash(&self, hasher: &mut H) {
A
Alex Crichton 已提交
900
        (**self).hash(hasher)
901 902 903
    }
}

J
Jorge Aparicio 已提交
904 905
#[allow(deprecated)]
#[deprecated = "Use overloaded `core::cmp::PartialEq`"]
906
impl<'a, S: Str> Equiv<S> for String {
907 908 909 910 911 912
    #[inline]
    fn equiv(&self, other: &S) -> bool {
        self.as_slice() == other.as_slice()
    }
}

A
Alex Crichton 已提交
913
#[experimental = "waiting on Add stabilization"]
J
Jorge Aparicio 已提交
914 915 916 917 918 919 920
impl<'a> Add<&'a str, String> for String {
    fn add(mut self, other: &str) -> String {
        self.push_str(other);
        self
    }
}

N
Nick Cameron 已提交
921 922 923
impl ops::Slice<uint, str> for String {
    #[inline]
    fn as_slice_<'a>(&'a self) -> &'a str {
A
Alex Crichton 已提交
924
        unsafe { mem::transmute(self.vec.as_slice()) }
N
Nick Cameron 已提交
925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941
    }

    #[inline]
    fn slice_from_or_fail<'a>(&'a self, from: &uint) -> &'a str {
        self[][*from..]
    }

    #[inline]
    fn slice_to_or_fail<'a>(&'a self, to: &uint) -> &'a str {
        self[][..*to]
    }

    #[inline]
    fn slice_or_fail<'a>(&'a self, from: &uint, to: &uint) -> &'a str {
        self[][*from..*to]
    }
}
942

943
#[experimental = "waiting on Deref stabilization"]
944 945 946
impl ops::Deref for String {
    type Target = str;

A
Alex Crichton 已提交
947 948 949
    fn deref<'a>(&'a self) -> &'a str {
        unsafe { mem::transmute(self.vec[]) }
    }
950 951
}

952 953 954 955 956 957
/// Wrapper type providing a `&String` reference via `Deref`.
#[experimental]
pub struct DerefString<'a> {
    x: DerefVec<'a, u8>
}

958 959 960
impl<'a> Deref for DerefString<'a> {
    type Target = String;

961 962 963 964 965 966
    fn deref<'b>(&'b self) -> &'b String {
        unsafe { mem::transmute(&*self.x) }
    }
}

/// Convert a string slice to a wrapper type providing a `&String` reference.
967 968 969 970 971 972 973 974 975 976 977 978 979
///
/// # Examples
///
/// ```
/// use std::string::as_string;
///
/// fn string_consumer(s: String) {
///     assert_eq!(s, "foo".to_string());
/// }
///
/// let string = as_string("foo").clone();
/// string_consumer(string);
/// ```
980 981 982 983 984
#[experimental]
pub fn as_string<'a>(x: &'a str) -> DerefString<'a> {
    DerefString { x: as_vec(x.as_bytes()) }
}

B
Brendan Zabarauskas 已提交
985 986 987 988 989 990 991
impl FromStr for String {
    #[inline]
    fn from_str(s: &str) -> Option<String> {
        Some(String::from_str(s))
    }
}

992
/// Trait for converting a type to a string, consuming it in the process.
993
#[deprecated = "trait will be removed"]
994 995 996 997 998
pub trait IntoString {
    /// Consume and convert to a string.
    fn into_string(self) -> String;
}

999 1000 1001 1002 1003 1004 1005
/// A generic trait for converting a value to a string
pub trait ToString {
    /// Converts the value of `self` to an owned string
    fn to_string(&self) -> String;
}

impl<T: fmt::Show> ToString for T {
1006
    fn to_string(&self) -> String {
1007 1008 1009 1010 1011
        use core::fmt::Writer;
        let mut buf = String::new();
        let _ = buf.write_fmt(format_args!("{}", self));
        buf.shrink_to_fit();
        buf
1012
    }
1013 1014
}

1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
impl IntoCow<'static, String, str> for String {
    fn into_cow(self) -> CowString<'static> {
        Cow::Owned(self)
    }
}

impl<'a> IntoCow<'a, String, str> for &'a str {
    fn into_cow(self) -> CowString<'a> {
        Cow::Borrowed(self)
    }
}

J
Jonas Hietala 已提交
1027
/// Unsafe operations
1028
#[deprecated]
1029 1030 1031 1032
pub mod raw {
    use super::String;
    use vec::Vec;

P
P1start 已提交
1033
    /// Creates a new `String` from a length, capacity, and pointer.
1034 1035
    ///
    /// This is unsafe because:
P
P1start 已提交
1036 1037
    /// * We call `Vec::from_raw_parts` to get a `Vec<u8>`;
    /// * We assume that the `Vec` contains valid UTF-8.
1038
    #[inline]
1039
    #[deprecated = "renamed to String::from_raw_parts"]
A
Adolfo Ochagavía 已提交
1040
    pub unsafe fn from_parts(buf: *mut u8, length: uint, capacity: uint) -> String {
1041
        String::from_raw_parts(buf, length, capacity)
1042 1043
    }

P
P1start 已提交
1044
    /// Creates a `String` from a `*const u8` buffer of the given length.
1045 1046
    ///
    /// This function is unsafe because of two reasons:
1047
    ///
P
P1start 已提交
1048 1049
    /// * A raw pointer is dereferenced and transmuted to `&[u8]`;
    /// * The slice is not checked to see whether it contains valid UTF-8.
1050
    #[deprecated = "renamed to String::from_raw_buf_len"]
1051
    pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> String {
1052
        String::from_raw_buf_len(buf, len)
1053
    }
A
Adolfo Ochagavía 已提交
1054

P
P1start 已提交
1055
    /// Creates a `String` from a null-terminated `*const u8` buffer.
A
Adolfo Ochagavía 已提交
1056 1057
    ///
    /// This function is unsafe because we dereference memory until we find the NUL character,
J
Joseph Crail 已提交
1058
    /// which is not guaranteed to be present. Additionally, the slice is not checked to see
A
Adolfo Ochagavía 已提交
1059
    /// whether it contains valid UTF-8
1060
    #[deprecated = "renamed to String::from_raw_buf"]
A
Adolfo Ochagavía 已提交
1061
    pub unsafe fn from_buf(buf: *const u8) -> String {
1062
        String::from_raw_buf(buf)
A
Adolfo Ochagavía 已提交
1063 1064 1065 1066
    }

    /// Converts a vector of bytes to a new `String` without checking if
    /// it contains valid UTF-8. This is unsafe because it assumes that
P
P1start 已提交
1067
    /// the UTF-8-ness of the vector has already been validated.
A
Adolfo Ochagavía 已提交
1068
    #[inline]
1069
    #[deprecated = "renamed to String::from_utf8_unchecked"]
A
Adolfo Ochagavía 已提交
1070
    pub unsafe fn from_utf8(bytes: Vec<u8>) -> String {
1071
        String::from_utf8_unchecked(bytes)
A
Adolfo Ochagavía 已提交
1072
    }
1073 1074
}

A
Alex Crichton 已提交
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
/// A clone-on-write string
#[stable]
pub type CowString<'a> = Cow<'a, String, str>;

#[allow(deprecated)]
impl<'a> Str for CowString<'a> {
    #[inline]
    fn as_slice<'b>(&'b self) -> &'b str {
        (**self).as_slice()
    }
}

1087 1088 1089 1090 1091 1092 1093
impl fmt::Writer for String {
    fn write_str(&mut self, s: &str) -> fmt::Result {
        self.push_str(s);
        Ok(())
    }
}

1094 1095
#[cfg(test)]
mod tests {
1096
    use prelude::*;
1097 1098
    use test::Bencher;

A
Alex Crichton 已提交
1099
    use str::Utf8Error;
1100
    use str;
1101
    use super::as_string;
1102

1103 1104 1105 1106 1107 1108
    #[test]
    fn test_as_string() {
        let x = "foo";
        assert_eq!(x, as_string(x).as_slice());
    }

1109 1110 1111 1112 1113
    #[test]
    fn test_from_str() {
      let owned: Option<::std::string::String> = from_str("string");
      assert_eq!(owned.as_ref().map(|s| s.as_slice()), Some("string"));
    }
1114 1115 1116

    #[test]
    fn test_from_utf8() {
N
NODA, Kai 已提交
1117
        let xs = b"hello".to_vec();
1118 1119
        assert_eq!(String::from_utf8(xs).unwrap(),
                   String::from_str("hello"));
1120

N
NODA, Kai 已提交
1121
        let xs = "ศไทย中华Việt Nam".as_bytes().to_vec();
1122 1123
        assert_eq!(String::from_utf8(xs).unwrap(),
                   String::from_str("ศไทย中华Việt Nam"));
1124

N
NODA, Kai 已提交
1125
        let xs = b"hello\xFF".to_vec();
1126 1127 1128
        let err = String::from_utf8(xs).err().unwrap();
        assert_eq!(err.utf8_error(), Utf8Error::TooShort);
        assert_eq!(err.into_bytes(), b"hello\xff".to_vec());
1129 1130 1131 1132 1133
    }

    #[test]
    fn test_from_utf8_lossy() {
        let xs = b"hello";
J
Jorge Aparicio 已提交
1134 1135
        let ys: str::CowString = "hello".into_cow();
        assert_eq!(String::from_utf8_lossy(xs), ys);
1136

A
Adolfo Ochagavía 已提交
1137
        let xs = "ศไทย中华Việt Nam".as_bytes();
J
Jorge Aparicio 已提交
1138 1139
        let ys: str::CowString = "ศไทย中华Việt Nam".into_cow();
        assert_eq!(String::from_utf8_lossy(xs), ys);
1140 1141

        let xs = b"Hello\xC2 There\xFF Goodbye";
A
Adolfo Ochagavía 已提交
1142
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
1143
                   String::from_str("Hello\u{FFFD} There\u{FFFD} Goodbye").into_cow());
1144 1145 1146

        let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
1147
                   String::from_str("Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye").into_cow());
1148 1149

        let xs = b"\xF5foo\xF5\x80bar";
A
Adolfo Ochagavía 已提交
1150
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
1151
                   String::from_str("\u{FFFD}foo\u{FFFD}\u{FFFD}bar").into_cow());
1152 1153

        let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
A
Adolfo Ochagavía 已提交
1154
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
1155
                   String::from_str("\u{FFFD}foo\u{FFFD}bar\u{FFFD}baz").into_cow());
1156 1157 1158

        let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
1159
                   String::from_str("\u{FFFD}foo\u{FFFD}bar\u{FFFD}\u{FFFD}baz").into_cow());
1160 1161

        let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
A
Alex Crichton 已提交
1162 1163
        assert_eq!(String::from_utf8_lossy(xs), String::from_str("\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\
                                               foo\u{10000}bar").into_cow());
1164 1165 1166

        // surrogates
        let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
A
Alex Crichton 已提交
1167 1168
        assert_eq!(String::from_utf8_lossy(xs), String::from_str("\u{FFFD}\u{FFFD}\u{FFFD}foo\
                                               \u{FFFD}\u{FFFD}\u{FFFD}bar").into_cow());
1169 1170
    }

1171 1172 1173
    #[test]
    fn test_from_utf16() {
        let pairs =
A
Adolfo Ochagavía 已提交
1174
            [(String::from_str("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n"),
1175 1176 1177 1178 1179
              vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
                0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
                0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
                0xd800_u16, 0xdf30_u16, 0x000a_u16]),

A
Adolfo Ochagavía 已提交
1180
             (String::from_str("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n"),
1181 1182 1183 1184 1185 1186 1187
              vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
                0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
                0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
                0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
                0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
                0x000a_u16]),

A
Adolfo Ochagavía 已提交
1188
             (String::from_str("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n"),
1189 1190 1191 1192 1193 1194 1195 1196
              vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
                0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
                0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
                0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
                0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
                0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
                0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),

A
Adolfo Ochagavía 已提交
1197
             (String::from_str("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n"),
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
              vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
                0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
                0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
                0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
                0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
                0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
                0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
                0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
                0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
                0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
                0x000a_u16 ]),
             // Issue #12318, even-numbered non-BMP planes
A
Alex Crichton 已提交
1210
             (String::from_str("\u{20000}"),
1211 1212 1213 1214
              vec![0xD840, 0xDC00])];

        for p in pairs.iter() {
            let (s, u) = (*p).clone();
1215
            let s_as_utf16 = s.utf16_units().collect::<Vec<u16>>();
1216 1217
            let u_as_string = String::from_utf16(u.as_slice()).unwrap();

A
Alex Crichton 已提交
1218
            assert!(::unicode::str::is_utf16(u.as_slice()));
1219 1220 1221 1222 1223 1224
            assert_eq!(s_as_utf16, u);

            assert_eq!(u_as_string, s);
            assert_eq!(String::from_utf16_lossy(u.as_slice()), s);

            assert_eq!(String::from_utf16(s_as_utf16.as_slice()).unwrap(), s);
1225
            assert_eq!(u_as_string.utf16_units().collect::<Vec<u16>>(), u);
1226 1227 1228 1229 1230 1231 1232
        }
    }

    #[test]
    fn test_utf16_invalid() {
        // completely positive cases tested above.
        // lead + eof
1233
        assert!(String::from_utf16(&[0xD800]).is_err());
1234
        // lead + lead
1235
        assert!(String::from_utf16(&[0xD800, 0xD800]).is_err());
1236 1237

        // isolated trail
1238
        assert!(String::from_utf16(&[0x0061, 0xDC00]).is_err());
1239 1240

        // general
1241
        assert!(String::from_utf16(&[0xD800, 0xd801, 0xdc8b, 0xD800]).is_err());
1242 1243 1244 1245 1246 1247
    }

    #[test]
    fn test_from_utf16_lossy() {
        // completely positive cases tested above.
        // lead + eof
A
Alex Crichton 已提交
1248
        assert_eq!(String::from_utf16_lossy(&[0xD800]), String::from_str("\u{FFFD}"));
1249
        // lead + lead
A
Alex Crichton 已提交
1250 1251
        assert_eq!(String::from_utf16_lossy(&[0xD800, 0xD800]),
                   String::from_str("\u{FFFD}\u{FFFD}"));
1252 1253

        // isolated trail
A
Alex Crichton 已提交
1254
        assert_eq!(String::from_utf16_lossy(&[0x0061, 0xDC00]), String::from_str("a\u{FFFD}"));
1255 1256

        // general
N
Nick Cameron 已提交
1257
        assert_eq!(String::from_utf16_lossy(&[0xD800, 0xd801, 0xdc8b, 0xD800]),
A
Alex Crichton 已提交
1258
                   String::from_str("\u{FFFD}𐒋\u{FFFD}"));
1259
    }
1260

1261 1262 1263 1264 1265 1266 1267 1268
    #[test]
    fn test_from_buf_len() {
        unsafe {
            let a = vec![65u8, 65, 65, 65, 65, 65, 65, 0];
            assert_eq!(super::raw::from_buf_len(a.as_ptr(), 3), String::from_str("AAA"));
        }
    }

A
Adolfo Ochagavía 已提交
1269 1270 1271 1272 1273 1274 1275 1276 1277 1278
    #[test]
    fn test_from_buf() {
        unsafe {
            let a = vec![65, 65, 65, 65, 65, 65, 65, 0];
            let b = a.as_ptr();
            let c = super::raw::from_buf(b);
            assert_eq!(c, String::from_str("AAAAAAA"));
        }
    }

1279 1280
    #[test]
    fn test_push_bytes() {
1281
        let mut s = String::from_str("ABC");
1282
        unsafe {
N
NODA, Kai 已提交
1283
            let mv = s.as_mut_vec();
N
Nick Cameron 已提交
1284
            mv.push_all(&[b'D']);
1285
        }
1286
        assert_eq!(s, "ABCD");
1287 1288 1289 1290
    }

    #[test]
    fn test_push_str() {
1291
        let mut s = String::new();
1292
        s.push_str("");
1293
        assert_eq!(s.slice_from(0), "");
1294
        s.push_str("abc");
1295
        assert_eq!(s.slice_from(0), "abc");
1296
        s.push_str("ประเทศไทย中华Việt Nam");
1297
        assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
1298 1299 1300
    }

    #[test]
1301
    fn test_push() {
1302
        let mut data = String::from_str("ประเทศไทย中");
1303 1304 1305 1306 1307
        data.push('华');
        data.push('b'); // 1 byte
        data.push('¢'); // 2 byte
        data.push('€'); // 3 byte
        data.push('𤭢'); // 4 byte
1308
        assert_eq!(data, "ประเทศไทย中华b¢€𤭢");
1309 1310
    }

1311
    #[test]
N
NODA, Kai 已提交
1312
    fn test_pop() {
1313
        let mut data = String::from_str("ประเทศไทย中华b¢€𤭢");
N
NODA, Kai 已提交
1314 1315 1316 1317 1318
        assert_eq!(data.pop().unwrap(), '𤭢'); // 4 bytes
        assert_eq!(data.pop().unwrap(), '€'); // 3 bytes
        assert_eq!(data.pop().unwrap(), '¢'); // 2 bytes
        assert_eq!(data.pop().unwrap(), 'b'); // 1 bytes
        assert_eq!(data.pop().unwrap(), '华');
1319
        assert_eq!(data, "ประเทศไทย中");
1320 1321
    }

1322 1323
    #[test]
    fn test_str_truncate() {
1324
        let mut s = String::from_str("12345");
1325
        s.truncate(5);
1326
        assert_eq!(s, "12345");
1327
        s.truncate(3);
1328
        assert_eq!(s, "123");
1329
        s.truncate(0);
1330
        assert_eq!(s, "");
1331

1332
        let mut s = String::from_str("12345");
1333
        let p = s.as_ptr();
1334 1335
        s.truncate(3);
        s.push_str("6");
1336
        let p_ = s.as_ptr();
1337 1338 1339 1340 1341 1342
        assert_eq!(p_, p);
    }

    #[test]
    #[should_fail]
    fn test_str_truncate_invalid_len() {
1343
        let mut s = String::from_str("12345");
1344 1345 1346 1347 1348 1349
        s.truncate(6);
    }

    #[test]
    #[should_fail]
    fn test_str_truncate_split_codepoint() {
A
Alex Crichton 已提交
1350
        let mut s = String::from_str("\u{FC}"); // ü
1351 1352
        s.truncate(1);
    }
1353 1354 1355

    #[test]
    fn test_str_clear() {
1356
        let mut s = String::from_str("12345");
1357 1358
        s.clear();
        assert_eq!(s.len(), 0);
1359
        assert_eq!(s, "");
1360
    }
1361 1362 1363 1364 1365

    #[test]
    fn test_str_add() {
        let a = String::from_str("12345");
        let b = a + "2";
J
Jorge Aparicio 已提交
1366
        let b = b + "2";
1367
        assert_eq!(b.len(), 7);
1368
        assert_eq!(b, "1234522");
1369
    }
1370

1371 1372 1373
    #[test]
    fn remove() {
        let mut s = "ศไทย中华Việt Nam; foobar".to_string();;
1374
        assert_eq!(s.remove(0), 'ศ');
1375
        assert_eq!(s.len(), 33);
1376
        assert_eq!(s, "ไทย中华Việt Nam; foobar");
1377
        assert_eq!(s.remove(17), 'ệ');
1378
        assert_eq!(s, "ไทย中华Vit Nam; foobar");
1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389
    }

    #[test] #[should_fail]
    fn remove_bad() {
        "ศ".to_string().remove(1);
    }

    #[test]
    fn insert() {
        let mut s = "foobar".to_string();
        s.insert(0, 'ệ');
1390
        assert_eq!(s, "ệfoobar");
1391
        s.insert(6, 'ย');
1392
        assert_eq!(s, "ệfooยbar");
1393 1394 1395 1396 1397
    }

    #[test] #[should_fail] fn insert_bad1() { "".to_string().insert(1, 't'); }
    #[test] #[should_fail] fn insert_bad2() { "ệ".to_string().insert(1, 't'); }

1398 1399 1400 1401 1402 1403 1404 1405 1406
    #[test]
    fn test_slicing() {
        let s = "foobar".to_string();
        assert_eq!("foobar", s[]);
        assert_eq!("foo", s[..3]);
        assert_eq!("bar", s[3..]);
        assert_eq!("oob", s[1..4]);
    }

1407 1408
    #[test]
    fn test_simple_types() {
1409 1410 1411 1412 1413 1414 1415 1416
        assert_eq!(1i.to_string(), "1");
        assert_eq!((-1i).to_string(), "-1");
        assert_eq!(200u.to_string(), "200");
        assert_eq!(2u8.to_string(), "2");
        assert_eq!(true.to_string(), "true");
        assert_eq!(false.to_string(), "false");
        assert_eq!(().to_string(), "()");
        assert_eq!(("hi".to_string()).to_string(), "hi");
1417 1418 1419 1420 1421
    }

    #[test]
    fn test_vectors() {
        let x: Vec<int> = vec![];
1422 1423 1424
        assert_eq!(x.to_string(), "[]");
        assert_eq!((vec![1i]).to_string(), "[1]");
        assert_eq!((vec![1i, 2, 3]).to_string(), "[1, 2, 3]");
1425
        assert!((vec![vec![], vec![1i], vec![1i, 1]]).to_string() ==
1426
               "[[], [1], [1, 1]]");
1427 1428
    }

1429 1430 1431 1432 1433 1434 1435
    #[test]
    fn test_from_iterator() {
        let s = "ศไทย中华Việt Nam".to_string();
        let t = "ศไทย中华";
        let u = "Việt Nam";

        let a: String = s.chars().collect();
1436
        assert_eq!(s, a);
1437 1438 1439

        let mut b = t.to_string();
        b.extend(u.chars());
1440 1441 1442 1443 1444 1445 1446 1447
        assert_eq!(s, b);

        let c: String = vec![t, u].into_iter().collect();
        assert_eq!(s, c);

        let mut d = t.to_string();
        d.extend(vec![u].into_iter());
        assert_eq!(s, d);
1448 1449
    }

1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
    #[bench]
    fn bench_with_capacity(b: &mut Bencher) {
        b.iter(|| {
            String::with_capacity(100)
        });
    }

    #[bench]
    fn bench_push_str(b: &mut Bencher) {
        let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
        b.iter(|| {
            let mut r = String::new();
            r.push_str(s);
        });
    }

1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500
    const REPETITIONS: u64 = 10_000;

    #[bench]
    fn bench_push_str_one_byte(b: &mut Bencher) {
        b.bytes = REPETITIONS;
        b.iter(|| {
            let mut r = String::new();
            for _ in range(0, REPETITIONS) {
                r.push_str("a")
            }
        });
    }

    #[bench]
    fn bench_push_char_one_byte(b: &mut Bencher) {
        b.bytes = REPETITIONS;
        b.iter(|| {
            let mut r = String::new();
            for _ in range(0, REPETITIONS) {
                r.push('a')
            }
        });
    }

    #[bench]
    fn bench_push_char_two_bytes(b: &mut Bencher) {
        b.bytes = REPETITIONS * 2;
        b.iter(|| {
            let mut r = String::new();
            for _ in range(0, REPETITIONS) {
                r.push('â')
            }
        });
    }

1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513
    #[bench]
    fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
        let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
                  Lorem ipsum dolor sit amet, consectetur. ";

        assert_eq!(100, s.len());
        b.iter(|| {
            let _ = String::from_utf8_lossy(s);
        });
    }

    #[bench]
    fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
1514
        let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535
        assert_eq!(100, s.len());
        b.iter(|| {
            let _ = String::from_utf8_lossy(s);
        });
    }

    #[bench]
    fn from_utf8_lossy_invalid(b: &mut Bencher) {
        let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
        b.iter(|| {
            let _ = String::from_utf8_lossy(s);
        });
    }

    #[bench]
    fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
        let s = Vec::from_elem(100, 0xF5u8);
        b.iter(|| {
            let _ = String::from_utf8_lossy(s.as_slice());
        });
    }
1536
}