string.rs 38.9 KB
Newer Older
1 2 3 4 5 6 7 8 9
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
10 11
//
// ignore-lexer-test FIXME #15679
12 13 14

//! An owned, growable string that enforces that its contents are valid UTF-8.

15 16
#![stable]

17 18
use core::prelude::*;

19
use core::borrow::{Cow, IntoCow};
20 21
use core::default::Default;
use core::fmt;
22
use core::hash;
23
use core::iter::FromIterator;
24
use core::mem;
25
use core::ops::{self, Deref, Add};
26
use core::ptr;
27
use core::raw::Slice as RawSlice;
A
Alex Crichton 已提交
28 29
use unicode::str as unicode_str;
use unicode::str::Utf16Item;
30

31
use str::{self, CharRange, FromStr, Utf8Error};
32
use vec::{DerefVec, Vec, as_vec};
33

34
/// A growable string stored as a UTF-8 encoded buffer.
35
#[derive(Clone, PartialOrd, Eq, Ord)]
A
Alex Crichton 已提交
36
#[stable]
37
pub struct String {
38 39 40
    vec: Vec<u8>,
}

41 42 43 44 45 46 47 48 49 50 51 52
/// A possible error value from the `String::from_utf8` function.
#[stable]
pub struct FromUtf8Error {
    bytes: Vec<u8>,
    error: Utf8Error,
}

/// A possible error value from the `String::from_utf16` function.
#[stable]
#[allow(missing_copy_implementations)]
pub struct FromUtf16Error(());

53
impl String {
J
Joseph Crail 已提交
54
    /// Creates a new string buffer initialized with the empty string.
J
Jonas Hietala 已提交
55
    ///
56
    /// # Examples
J
Jonas Hietala 已提交
57 58 59 60
    ///
    /// ```
    /// let mut s = String::new();
    /// ```
61
    #[inline]
A
Alex Crichton 已提交
62
    #[stable]
63 64
    pub fn new() -> String {
        String {
65 66 67 68 69
            vec: Vec::new(),
        }
    }

    /// Creates a new string buffer with the given capacity.
J
Jonas Hietala 已提交
70 71 72
    /// The string will be able to hold exactly `capacity` bytes without
    /// reallocating. If `capacity` is 0, the string will not allocate.
    ///
73
    /// # Examples
J
Jonas Hietala 已提交
74 75 76 77
    ///
    /// ```
    /// let mut s = String::with_capacity(10);
    /// ```
78
    #[inline]
A
Alex Crichton 已提交
79
    #[stable]
80 81
    pub fn with_capacity(capacity: uint) -> String {
        String {
82 83 84 85 86
            vec: Vec::with_capacity(capacity),
        }
    }

    /// Creates a new string buffer from the given string.
J
Jonas Hietala 已提交
87
    ///
88
    /// # Examples
J
Jonas Hietala 已提交
89 90 91 92 93
    ///
    /// ```
    /// let s = String::from_str("hello");
    /// assert_eq!(s.as_slice(), "hello");
    /// ```
94
    #[inline]
A
Alex Crichton 已提交
95
    #[experimental = "needs investigation to see if to_string() can match perf"]
96
    pub fn from_str(string: &str) -> String {
97
        String { vec: ::slice::SliceExt::to_vec(string.as_bytes()) }
98 99
    }

100 101 102
    /// Returns the vector as a string buffer, if possible, taking care not to
    /// copy it.
    ///
A
Alex Crichton 已提交
103 104 105 106
    /// # Failure
    ///
    /// If the given vector is not valid UTF-8, then the original vector and the
    /// corresponding error is returned.
107
    ///
108
    /// # Examples
109 110
    ///
    /// ```rust
A
Alex Crichton 已提交
111 112
    /// use std::str::Utf8Error;
    ///
113
    /// let hello_vec = vec![104, 101, 108, 108, 111];
114 115
    /// let s = String::from_utf8(hello_vec).unwrap();
    /// assert_eq!(s, "hello");
J
Jonas Hietala 已提交
116 117
    ///
    /// let invalid_vec = vec![240, 144, 128];
118 119 120
    /// let s = String::from_utf8(invalid_vec).err().unwrap();
    /// assert_eq!(s.utf8_error(), Utf8Error::TooShort);
    /// assert_eq!(s.into_bytes(), vec![240, 144, 128]);
121
    /// ```
122
    #[inline]
123 124
    #[stable]
    pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
A
Alex Crichton 已提交
125 126
        match str::from_utf8(vec.as_slice()) {
            Ok(..) => Ok(String { vec: vec }),
127
            Err(e) => Err(FromUtf8Error { bytes: vec, error: e })
128 129
        }
    }
130

P
P1start 已提交
131 132
    /// Converts a vector of bytes to a new UTF-8 string.
    /// Any invalid UTF-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
133
    ///
134
    /// # Examples
135 136 137
    ///
    /// ```rust
    /// let input = b"Hello \xF0\x90\x80World";
A
Adolfo Ochagavía 已提交
138
    /// let output = String::from_utf8_lossy(input);
A
Alex Crichton 已提交
139
    /// assert_eq!(output.as_slice(), "Hello \u{FFFD}World");
140
    /// ```
141
    #[stable]
142
    pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
143
        let mut i = 0;
A
Alex Crichton 已提交
144 145
        match str::from_utf8(v) {
            Ok(s) => return Cow::Borrowed(s),
146 147 148 149 150
            Err(e) => {
                if let Utf8Error::InvalidByte(firstbad) = e {
                    i = firstbad;
                }
            }
151 152 153 154 155 156
        }

        static TAG_CONT_U8: u8 = 128u8;
        static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
        let total = v.len();
        fn unsafe_get(xs: &[u8], i: uint) -> u8 {
A
Aaron Turon 已提交
157
            unsafe { *xs.get_unchecked(i) }
158 159 160 161 162 163 164 165 166 167 168 169 170
        }
        fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
            if i >= total {
                0
            } else {
                unsafe_get(xs, i)
            }
        }

        let mut res = String::with_capacity(total);

        if i > 0 {
            unsafe {
171
                res.as_mut_vec().push_all(v[..i])
172 173 174 175 176 177
            };
        }

        // subseqidx is the index of the first byte of the subsequence we're looking at.
        // It's used to copy a bunch of contiguous good codepoints at once instead of copying
        // them one by one.
178
        let mut subseqidx = i;
179 180 181 182 183 184

        while i < total {
            let i_ = i;
            let byte = unsafe_get(v, i);
            i += 1;

185
            macro_rules! error { () => ({
186 187
                unsafe {
                    if subseqidx != i_ {
188
                        res.as_mut_vec().push_all(v[subseqidx..i_]);
189 190
                    }
                    subseqidx = i;
191
                    res.as_mut_vec().push_all(REPLACEMENT);
192
                }
193
            })}
194 195 196 197

            if byte < 128u8 {
                // subseqidx handles this
            } else {
A
Alex Crichton 已提交
198
                let w = unicode_str::utf8_char_width(byte);
199 200 201 202 203 204 205 206 207 208 209

                match w {
                    2 => {
                        if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
                            error!();
                            continue;
                        }
                        i += 1;
                    }
                    3 => {
                        match (byte, safe_get(v, i, total)) {
210 211 212 213
                            (0xE0         , 0xA0 ... 0xBF) => (),
                            (0xE1 ... 0xEC, 0x80 ... 0xBF) => (),
                            (0xED         , 0x80 ... 0x9F) => (),
                            (0xEE ... 0xEF, 0x80 ... 0xBF) => (),
214 215 216 217 218 219 220 221 222 223 224 225 226 227
                            _ => {
                                error!();
                                continue;
                            }
                        }
                        i += 1;
                        if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
                            error!();
                            continue;
                        }
                        i += 1;
                    }
                    4 => {
                        match (byte, safe_get(v, i, total)) {
228 229 230
                            (0xF0         , 0x90 ... 0xBF) => (),
                            (0xF1 ... 0xF3, 0x80 ... 0xBF) => (),
                            (0xF4         , 0x80 ... 0x8F) => (),
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
                            _ => {
                                error!();
                                continue;
                            }
                        }
                        i += 1;
                        if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
                            error!();
                            continue;
                        }
                        i += 1;
                        if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
                            error!();
                            continue;
                        }
                        i += 1;
                    }
                    _ => {
                        error!();
                        continue;
                    }
                }
            }
        }
        if subseqidx < total {
            unsafe {
257
                res.as_mut_vec().push_all(v[subseqidx..total])
258 259
            };
        }
A
Alex Crichton 已提交
260
        Cow::Owned(res)
261 262
    }

A
Adolfo Ochagavía 已提交
263
    /// Decode a UTF-16 encoded vector `v` into a `String`, returning `None`
A
Adolfo Ochagavía 已提交
264 265
    /// if `v` contains any invalid data.
    ///
266
    /// # Examples
A
Adolfo Ochagavía 已提交
267 268
    ///
    /// ```rust
A
Adolfo Ochagavía 已提交
269
    /// // 𝄞music
N
Nick Cameron 已提交
270 271
    /// let mut v = &mut [0xD834, 0xDD1E, 0x006d, 0x0075,
    ///                   0x0073, 0x0069, 0x0063];
272 273
    /// assert_eq!(String::from_utf16(v).unwrap(),
    ///            "𝄞music".to_string());
A
Adolfo Ochagavía 已提交
274
    ///
A
Adolfo Ochagavía 已提交
275
    /// // 𝄞mu<invalid>ic
A
Adolfo Ochagavía 已提交
276
    /// v[4] = 0xD800;
277
    /// assert!(String::from_utf16(v).is_err());
A
Adolfo Ochagavía 已提交
278
    /// ```
279 280
    #[stable]
    pub fn from_utf16(v: &[u16]) -> Result<String, FromUtf16Error> {
281
        let mut s = String::with_capacity(v.len());
A
Alex Crichton 已提交
282
        for c in unicode_str::utf16_items(v) {
A
Adolfo Ochagavía 已提交
283
            match c {
A
Alex Crichton 已提交
284
                Utf16Item::ScalarValue(c) => s.push(c),
285
                Utf16Item::LoneSurrogate(_) => return Err(FromUtf16Error(())),
A
Adolfo Ochagavía 已提交
286 287
            }
        }
288
        Ok(s)
A
Adolfo Ochagavía 已提交
289
    }
290

291 292 293
    /// Decode a UTF-16 encoded vector `v` into a string, replacing
    /// invalid data with the replacement character (U+FFFD).
    ///
294 295
    /// # Examples
    ///
296
    /// ```rust
A
Adolfo Ochagavía 已提交
297
    /// // 𝄞mus<invalid>ic<invalid>
N
Nick Cameron 已提交
298 299 300
    /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075,
    ///           0x0073, 0xDD1E, 0x0069, 0x0063,
    ///           0xD834];
301 302
    ///
    /// assert_eq!(String::from_utf16_lossy(v),
A
Alex Crichton 已提交
303
    ///            "𝄞mus\u{FFFD}ic\u{FFFD}".to_string());
304
    /// ```
A
Alex Crichton 已提交
305
    #[stable]
306
    pub fn from_utf16_lossy(v: &[u16]) -> String {
A
Alex Crichton 已提交
307
        unicode_str::utf16_items(v).map(|c| c.to_char_lossy()).collect()
308
    }
A
Adolfo Ochagavía 已提交
309

310 311 312 313 314 315
    /// Creates a new `String` from a length, capacity, and pointer.
    ///
    /// This is unsafe because:
    /// * We call `Vec::from_raw_parts` to get a `Vec<u8>`;
    /// * We assume that the `Vec` contains valid UTF-8.
    #[inline]
316
    #[stable]
317 318 319 320 321 322 323 324 325 326
    pub unsafe fn from_raw_parts(buf: *mut u8, length: uint, capacity: uint) -> String {
        String {
            vec: Vec::from_raw_parts(buf, length, capacity),
        }
    }

    /// Converts a vector of bytes to a new `String` without checking if
    /// it contains valid UTF-8. This is unsafe because it assumes that
    /// the UTF-8-ness of the vector has already been validated.
    #[inline]
327
    #[stable]
328 329 330 331
    pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String {
        String { vec: bytes }
    }

332
    /// Return the underlying byte buffer, encoded as UTF-8.
J
Jonas Hietala 已提交
333
    ///
334
    /// # Examples
J
Jonas Hietala 已提交
335 336 337 338 339 340
    ///
    /// ```
    /// let s = String::from_str("hello");
    /// let bytes = s.into_bytes();
    /// assert_eq!(bytes, vec![104, 101, 108, 108, 111]);
    /// ```
341
    #[inline]
A
Alex Crichton 已提交
342
    #[stable]
343 344 345 346
    pub fn into_bytes(self) -> Vec<u8> {
        self.vec
    }

347
    /// Pushes the given string onto this string buffer.
J
Jonas Hietala 已提交
348
    ///
349
    /// # Examples
J
Jonas Hietala 已提交
350 351 352 353 354 355
    ///
    /// ```
    /// let mut s = String::from_str("foo");
    /// s.push_str("bar");
    /// assert_eq!(s.as_slice(), "foobar");
    /// ```
356
    #[inline]
357
    #[stable]
358 359 360 361
    pub fn push_str(&mut self, string: &str) {
        self.vec.push_all(string.as_bytes())
    }

362 363
    /// Returns the number of bytes that this string buffer can hold without
    /// reallocating.
A
Alex Crichton 已提交
364
    ///
365
    /// # Examples
A
Alex Crichton 已提交
366 367 368
    ///
    /// ```
    /// let s = String::with_capacity(10);
369
    /// assert!(s.capacity() >= 10);
A
Alex Crichton 已提交
370 371
    /// ```
    #[inline]
372
    #[stable]
A
Alex Crichton 已提交
373 374 375 376
    pub fn capacity(&self) -> uint {
        self.vec.capacity()
    }

377 378 379
    /// Reserves capacity for at least `additional` more bytes to be inserted
    /// in the given `String`. The collection may reserve more space to avoid
    /// frequent reallocations.
380 381 382 383
    ///
    /// # Panics
    ///
    /// Panics if the new capacity overflows `uint`.
J
Jonas Hietala 已提交
384
    ///
385
    /// # Examples
J
Jonas Hietala 已提交
386 387 388 389
    ///
    /// ```
    /// let mut s = String::new();
    /// s.reserve(10);
390
    /// assert!(s.capacity() >= 10);
J
Jonas Hietala 已提交
391
    /// ```
392
    #[inline]
393
    #[stable]
394 395
    pub fn reserve(&mut self, additional: uint) {
        self.vec.reserve(additional)
396 397
    }

398 399 400
    /// Reserves the minimum capacity for exactly `additional` more bytes to be
    /// inserted in the given `String`. Does nothing if the capacity is already
    /// sufficient.
401
    ///
402 403 404
    /// Note that the allocator may give the collection more space than it
    /// requests. Therefore capacity can not be relied upon to be precisely
    /// minimal. Prefer `reserve` if future insertions are expected.
405 406 407 408
    ///
    /// # Panics
    ///
    /// Panics if the new capacity overflows `uint`.
J
Jonas Hietala 已提交
409
    ///
410
    /// # Examples
J
Jonas Hietala 已提交
411 412 413
    ///
    /// ```
    /// let mut s = String::new();
414 415
    /// s.reserve(10);
    /// assert!(s.capacity() >= 10);
J
Jonas Hietala 已提交
416
    /// ```
417
    #[inline]
418
    #[stable]
419 420
    pub fn reserve_exact(&mut self, additional: uint) {
        self.vec.reserve_exact(additional)
421 422 423
    }

    /// Shrinks the capacity of this string buffer to match its length.
J
Jonas Hietala 已提交
424
    ///
425
    /// # Examples
J
Jonas Hietala 已提交
426 427 428 429
    ///
    /// ```
    /// let mut s = String::from_str("foo");
    /// s.reserve(100);
430
    /// assert!(s.capacity() >= 100);
J
Jonas Hietala 已提交
431
    /// s.shrink_to_fit();
432
    /// assert_eq!(s.capacity(), 3);
J
Jonas Hietala 已提交
433
    /// ```
434
    #[inline]
435
    #[stable]
436 437 438 439 440
    pub fn shrink_to_fit(&mut self) {
        self.vec.shrink_to_fit()
    }

    /// Adds the given character to the end of the string.
J
Jonas Hietala 已提交
441
    ///
442
    /// # Examples
J
Jonas Hietala 已提交
443 444 445
    ///
    /// ```
    /// let mut s = String::from_str("abc");
A
Alex Crichton 已提交
446 447 448
    /// s.push('1');
    /// s.push('2');
    /// s.push('3');
J
Jonas Hietala 已提交
449 450
    /// assert_eq!(s.as_slice(), "abc123");
    /// ```
451
    #[inline]
452
    #[stable]
A
Alex Crichton 已提交
453
    pub fn push(&mut self, ch: char) {
454 455 456 457 458
        if (ch as u32) < 0x80 {
            self.vec.push(ch as u8);
            return;
        }

459
        let cur_len = self.len();
460
        // This may use up to 4 bytes.
461
        self.vec.reserve(4);
462

463
        unsafe {
464 465
            // Attempt to not use an intermediate buffer by just pushing bytes
            // directly onto this string.
466
            let slice = RawSlice {
467 468 469
                data: self.vec.as_ptr().offset(cur_len as int),
                len: 4,
            };
470
            let used = ch.encode_utf8(mem::transmute(slice)).unwrap_or(0);
471 472 473 474 475
            self.vec.set_len(cur_len + used);
        }
    }

    /// Works with the underlying buffer as a byte slice.
J
Jonas Hietala 已提交
476
    ///
477
    /// # Examples
J
Jonas Hietala 已提交
478 479 480
    ///
    /// ```
    /// let s = String::from_str("hello");
N
Nick Cameron 已提交
481 482
    /// let b: &[_] = &[104, 101, 108, 108, 111];
    /// assert_eq!(s.as_bytes(), b);
J
Jonas Hietala 已提交
483
    /// ```
484
    #[inline]
A
Alex Crichton 已提交
485
    #[stable]
486 487 488 489
    pub fn as_bytes<'a>(&'a self) -> &'a [u8] {
        self.vec.as_slice()
    }

P
P1start 已提交
490
    /// Shortens a string to the specified length.
J
Jonas Hietala 已提交
491
    ///
492
    /// # Panics
J
Jonas Hietala 已提交
493
    ///
494
    /// Panics if `new_len` > current length,
495
    /// or if `new_len` is not a character boundary.
J
Jonas Hietala 已提交
496
    ///
497
    /// # Examples
J
Jonas Hietala 已提交
498 499 500 501 502 503
    ///
    /// ```
    /// let mut s = String::from_str("hello");
    /// s.truncate(2);
    /// assert_eq!(s.as_slice(), "he");
    /// ```
504
    #[inline]
505
    #[stable]
506
    pub fn truncate(&mut self, new_len: uint) {
507
        assert!(self.is_char_boundary(new_len));
508
        self.vec.truncate(new_len)
509 510
    }

J
Jonas Hietala 已提交
511 512 513
    /// Removes the last character from the string buffer and returns it.
    /// Returns `None` if this string buffer is empty.
    ///
514
    /// # Examples
J
Jonas Hietala 已提交
515 516 517
    ///
    /// ```
    /// let mut s = String::from_str("foo");
A
Alex Crichton 已提交
518 519 520 521
    /// assert_eq!(s.pop(), Some('o'));
    /// assert_eq!(s.pop(), Some('o'));
    /// assert_eq!(s.pop(), Some('f'));
    /// assert_eq!(s.pop(), None);
J
Jonas Hietala 已提交
522
    /// ```
523
    #[inline]
524
    #[stable]
A
Alex Crichton 已提交
525
    pub fn pop(&mut self) -> Option<char> {
526 527 528 529 530
        let len = self.len();
        if len == 0 {
            return None
        }

531
        let CharRange {ch, next} = self.char_range_at_reverse(len);
532 533 534 535 536 537
        unsafe {
            self.vec.set_len(next);
        }
        Some(ch)
    }

538
    /// Removes the character from the string buffer at byte position `idx` and
539
    /// returns it.
540 541 542
    ///
    /// # Warning
    ///
543
    /// This is an O(n) operation as it requires copying every element in the
544 545
    /// buffer.
    ///
S
Steve Klabnik 已提交
546
    /// # Panics
547
    ///
548 549
    /// If `idx` does not lie on a character boundary, or if it is out of
    /// bounds, then this function will panic.
J
Jonas Hietala 已提交
550
    ///
551
    /// # Examples
J
Jonas Hietala 已提交
552 553 554
    ///
    /// ```
    /// let mut s = String::from_str("foo");
555 556 557
    /// assert_eq!(s.remove(0), 'f');
    /// assert_eq!(s.remove(1), 'o');
    /// assert_eq!(s.remove(0), 'o');
J
Jonas Hietala 已提交
558
    /// ```
559 560
    #[stable]
    pub fn remove(&mut self, idx: uint) -> char {
561
        let len = self.len();
562
        assert!(idx <= len);
563

564
        let CharRange { ch, next } = self.char_range_at(idx);
565
        unsafe {
566 567 568 569
            ptr::copy_memory(self.vec.as_mut_ptr().offset(idx as int),
                             self.vec.as_ptr().offset(next as int),
                             len - next);
            self.vec.set_len(len - (next - idx));
570
        }
571
        ch
572
    }
573

574 575 576 577
    /// Insert a character into the string buffer at byte position `idx`.
    ///
    /// # Warning
    ///
578
    /// This is an O(n) operation as it requires copying every element in the
579 580
    /// buffer.
    ///
S
Steve Klabnik 已提交
581
    /// # Panics
582 583
    ///
    /// If `idx` does not lie on a character boundary or is out of bounds, then
S
Steve Klabnik 已提交
584
    /// this function will panic.
585
    #[stable]
586 587 588
    pub fn insert(&mut self, idx: uint, ch: char) {
        let len = self.len();
        assert!(idx <= len);
589
        assert!(self.is_char_boundary(idx));
590
        self.vec.reserve(4);
591
        let mut bits = [0; 4];
N
Nick Cameron 已提交
592
        let amt = ch.encode_utf8(&mut bits).unwrap();
593 594 595 596 597 598 599 600 601 602 603 604

        unsafe {
            ptr::copy_memory(self.vec.as_mut_ptr().offset((idx + amt) as int),
                             self.vec.as_ptr().offset(idx as int),
                             len - idx);
            ptr::copy_memory(self.vec.as_mut_ptr().offset(idx as int),
                             bits.as_ptr(),
                             amt);
            self.vec.set_len(len + amt);
        }
    }

605 606
    /// Views the string buffer as a mutable sequence of bytes.
    ///
J
Jonas Hietala 已提交
607 608 609
    /// This is unsafe because it does not check
    /// to ensure that the resulting string will be valid UTF-8.
    ///
610
    /// # Examples
J
Jonas Hietala 已提交
611 612 613 614 615 616 617 618 619 620
    ///
    /// ```
    /// let mut s = String::from_str("hello");
    /// unsafe {
    ///     let vec = s.as_mut_vec();
    ///     assert!(vec == &mut vec![104, 101, 108, 108, 111]);
    ///     vec.reverse();
    /// }
    /// assert_eq!(s.as_slice(), "olleh");
    /// ```
621
    #[stable]
622 623 624
    pub unsafe fn as_mut_vec<'a>(&'a mut self) -> &'a mut Vec<u8> {
        &mut self.vec
    }
625

626 627
    /// Return the number of bytes in this string.
    ///
628
    /// # Examples
629 630 631 632 633
    ///
    /// ```
    /// let a = "foo".to_string();
    /// assert_eq!(a.len(), 3);
    /// ```
634
    #[inline]
A
Alex Crichton 已提交
635
    #[stable]
636
    pub fn len(&self) -> uint { self.vec.len() }
637

638 639
    /// Returns true if the string contains no bytes
    ///
640
    /// # Examples
641 642 643 644 645 646 647
    ///
    /// ```
    /// let mut v = String::new();
    /// assert!(v.is_empty());
    /// v.push('a');
    /// assert!(!v.is_empty());
    /// ```
648
    #[stable]
649 650 651 652
    pub fn is_empty(&self) -> bool { self.len() == 0 }

    /// Truncates the string, returning it to 0 length.
    ///
653
    /// # Examples
654 655 656 657 658 659
    ///
    /// ```
    /// let mut s = "foo".to_string();
    /// s.clear();
    /// assert!(s.is_empty());
    /// ```
660
    #[inline]
A
Alex Crichton 已提交
661
    #[stable]
662
    pub fn clear(&mut self) {
663 664 665 666
        self.vec.clear()
    }
}

667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
impl FromUtf8Error {
    /// Consume this error, returning the bytes that were attempted to make a
    /// `String` with.
    #[stable]
    pub fn into_bytes(self) -> Vec<u8> { self.bytes }

    /// Access the underlying UTF8-error that was the cause of this error.
    #[stable]
    pub fn utf8_error(&self) -> Utf8Error { self.error }
}

impl fmt::Show for FromUtf8Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.error.fmt(f)
    }
}

impl fmt::Show for FromUtf16Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        "invalid utf-16: lone surrogate found".fmt(f)
    }
}

690
#[stable]
691
impl FromIterator<char> for String {
J
Jorge Aparicio 已提交
692
    fn from_iter<I:Iterator<Item=char>>(iterator: I) -> String {
693
        let mut buf = String::new();
694 695 696 697 698
        buf.extend(iterator);
        buf
    }
}

699
#[stable]
700
impl<'a> FromIterator<&'a str> for String {
J
Jorge Aparicio 已提交
701
    fn from_iter<I:Iterator<Item=&'a str>>(iterator: I) -> String {
702 703 704 705 706 707
        let mut buf = String::new();
        buf.extend(iterator);
        buf
    }
}

G
gamazeps 已提交
708 709
#[experimental = "waiting on Extend stabilization"]
impl Extend<char> for String {
J
Jorge Aparicio 已提交
710
    fn extend<I:Iterator<Item=char>>(&mut self, mut iterator: I) {
711 712
        let (lower_bound, _) = iterator.size_hint();
        self.reserve(lower_bound);
713
        for ch in iterator {
714
            self.push(ch)
715 716 717 718
        }
    }
}

719 720
#[experimental = "waiting on Extend stabilization"]
impl<'a> Extend<&'a str> for String {
J
Jorge Aparicio 已提交
721
    fn extend<I: Iterator<Item=&'a str>>(&mut self, mut iterator: I) {
722 723 724 725 726 727 728 729 730
        // A guess that at least one byte per iterator element will be needed.
        let (lower_bound, _) = iterator.size_hint();
        self.reserve(lower_bound);
        for s in iterator {
            self.push_str(s)
        }
    }
}

A
Aaron Turon 已提交
731
#[stable]
J
Jorge Aparicio 已提交
732 733 734 735 736 737 738 739 740
impl PartialEq for String {
    #[inline]
    fn eq(&self, other: &String) -> bool { PartialEq::eq(&**self, &**other) }
    #[inline]
    fn ne(&self, other: &String) -> bool { PartialEq::ne(&**self, &**other) }
}

macro_rules! impl_eq {
    ($lhs:ty, $rhs: ty) => {
A
Aaron Turon 已提交
741
        #[stable]
J
Jorge Aparicio 已提交
742 743 744 745 746 747 748
        impl<'a> PartialEq<$rhs> for $lhs {
            #[inline]
            fn eq(&self, other: &$rhs) -> bool { PartialEq::eq(&**self, &**other) }
            #[inline]
            fn ne(&self, other: &$rhs) -> bool { PartialEq::ne(&**self, &**other) }
        }

A
Aaron Turon 已提交
749
        #[stable]
J
Jorge Aparicio 已提交
750 751 752 753 754 755 756 757 758 759
        impl<'a> PartialEq<$lhs> for $rhs {
            #[inline]
            fn eq(&self, other: &$lhs) -> bool { PartialEq::eq(&**self, &**other) }
            #[inline]
            fn ne(&self, other: &$lhs) -> bool { PartialEq::ne(&**self, &**other) }
        }

    }
}

760 761
impl_eq! { String, &'a str }
impl_eq! { CowString<'a>, String }
J
Jorge Aparicio 已提交
762

A
Aaron Turon 已提交
763
#[stable]
J
Jorge Aparicio 已提交
764 765 766 767 768 769 770
impl<'a, 'b> PartialEq<&'b str> for CowString<'a> {
    #[inline]
    fn eq(&self, other: &&'b str) -> bool { PartialEq::eq(&**self, &**other) }
    #[inline]
    fn ne(&self, other: &&'b str) -> bool { PartialEq::ne(&**self, &**other) }
}

A
Aaron Turon 已提交
771
#[stable]
J
Jorge Aparicio 已提交
772 773 774 775 776 777 778
impl<'a, 'b> PartialEq<CowString<'a>> for &'b str {
    #[inline]
    fn eq(&self, other: &CowString<'a>) -> bool { PartialEq::eq(&**self, &**other) }
    #[inline]
    fn ne(&self, other: &CowString<'a>) -> bool { PartialEq::ne(&**self, &**other) }
}

A
Alex Crichton 已提交
779
#[experimental = "waiting on Str stabilization"]
780
impl Str for String {
781
    #[inline]
A
Alex Crichton 已提交
782
    #[stable]
783
    fn as_slice<'a>(&'a self) -> &'a str {
A
Alex Crichton 已提交
784
        unsafe { mem::transmute(self.vec.as_slice()) }
785
    }
786 787
}

A
Alex Crichton 已提交
788
#[stable]
789
impl Default for String {
790
    #[stable]
791 792
    fn default() -> String {
        String::new()
793 794 795
    }
}

A
Alex Crichton 已提交
796
#[experimental = "waiting on Show stabilization"]
797
impl fmt::Show for String {
798
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
A
Alex Crichton 已提交
799
        (**self).fmt(f)
800 801 802
    }
}

A
Alex Crichton 已提交
803
#[experimental = "waiting on Hash stabilization"]
804
impl<H: hash::Writer> hash::Hash<H> for String {
805 806
    #[inline]
    fn hash(&self, hasher: &mut H) {
A
Alex Crichton 已提交
807
        (**self).hash(hasher)
808 809 810
    }
}

811
#[unstable = "recent addition, needs more experience"]
J
Jorge Aparicio 已提交
812 813 814
impl<'a> Add<&'a str> for String {
    type Output = String;

J
Jorge Aparicio 已提交
815 816 817 818 819 820
    fn add(mut self, other: &str) -> String {
        self.push_str(other);
        self
    }
}

N
Nick Cameron 已提交
821 822 823
impl ops::Slice<uint, str> for String {
    #[inline]
    fn as_slice_<'a>(&'a self) -> &'a str {
A
Alex Crichton 已提交
824
        unsafe { mem::transmute(self.vec.as_slice()) }
N
Nick Cameron 已提交
825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
    }

    #[inline]
    fn slice_from_or_fail<'a>(&'a self, from: &uint) -> &'a str {
        self[][*from..]
    }

    #[inline]
    fn slice_to_or_fail<'a>(&'a self, to: &uint) -> &'a str {
        self[][..*to]
    }

    #[inline]
    fn slice_or_fail<'a>(&'a self, from: &uint, to: &uint) -> &'a str {
        self[][*from..*to]
    }
}
842

843
#[stable]
844 845 846
impl ops::Deref for String {
    type Target = str;

A
Alex Crichton 已提交
847 848 849
    fn deref<'a>(&'a self) -> &'a str {
        unsafe { mem::transmute(self.vec[]) }
    }
850 851
}

852 853 854 855 856 857
/// Wrapper type providing a `&String` reference via `Deref`.
#[experimental]
pub struct DerefString<'a> {
    x: DerefVec<'a, u8>
}

858 859 860
impl<'a> Deref for DerefString<'a> {
    type Target = String;

861 862 863 864 865 866
    fn deref<'b>(&'b self) -> &'b String {
        unsafe { mem::transmute(&*self.x) }
    }
}

/// Convert a string slice to a wrapper type providing a `&String` reference.
867 868 869 870 871 872 873 874 875 876 877 878 879
///
/// # Examples
///
/// ```
/// use std::string::as_string;
///
/// fn string_consumer(s: String) {
///     assert_eq!(s, "foo".to_string());
/// }
///
/// let string = as_string("foo").clone();
/// string_consumer(string);
/// ```
880 881 882 883 884
#[experimental]
pub fn as_string<'a>(x: &'a str) -> DerefString<'a> {
    DerefString { x: as_vec(x.as_bytes()) }
}

B
Brendan Zabarauskas 已提交
885 886 887 888 889 890 891
impl FromStr for String {
    #[inline]
    fn from_str(s: &str) -> Option<String> {
        Some(String::from_str(s))
    }
}

892 893 894 895 896 897 898
/// A generic trait for converting a value to a string
pub trait ToString {
    /// Converts the value of `self` to an owned string
    fn to_string(&self) -> String;
}

impl<T: fmt::Show> ToString for T {
899
    fn to_string(&self) -> String {
900 901 902 903 904
        use core::fmt::Writer;
        let mut buf = String::new();
        let _ = buf.write_fmt(format_args!("{}", self));
        buf.shrink_to_fit();
        buf
905
    }
906 907
}

908 909 910 911 912 913 914 915 916 917 918 919
impl IntoCow<'static, String, str> for String {
    fn into_cow(self) -> CowString<'static> {
        Cow::Owned(self)
    }
}

impl<'a> IntoCow<'a, String, str> for &'a str {
    fn into_cow(self) -> CowString<'a> {
        Cow::Borrowed(self)
    }
}

A
Alex Crichton 已提交
920 921 922 923 924 925 926 927 928 929 930
/// A clone-on-write string
#[stable]
pub type CowString<'a> = Cow<'a, String, str>;

impl<'a> Str for CowString<'a> {
    #[inline]
    fn as_slice<'b>(&'b self) -> &'b str {
        (**self).as_slice()
    }
}

931 932 933 934 935 936 937
impl fmt::Writer for String {
    fn write_str(&mut self, s: &str) -> fmt::Result {
        self.push_str(s);
        Ok(())
    }
}

938 939
#[cfg(test)]
mod tests {
940
    use prelude::*;
941 942
    use test::Bencher;

A
Alex Crichton 已提交
943
    use str::Utf8Error;
A
Alex Crichton 已提交
944 945
    use core::iter::repeat;
    use super::{as_string, CowString};
946

947 948 949 950 951 952
    #[test]
    fn test_as_string() {
        let x = "foo";
        assert_eq!(x, as_string(x).as_slice());
    }

953 954
    #[test]
    fn test_from_str() {
A
Alex Crichton 已提交
955
      let owned: Option<::std::string::String> = "string".parse();
956 957
      assert_eq!(owned.as_ref().map(|s| s.as_slice()), Some("string"));
    }
958 959 960

    #[test]
    fn test_from_utf8() {
N
NODA, Kai 已提交
961
        let xs = b"hello".to_vec();
962 963
        assert_eq!(String::from_utf8(xs).unwrap(),
                   String::from_str("hello"));
964

N
NODA, Kai 已提交
965
        let xs = "ศไทย中华Việt Nam".as_bytes().to_vec();
966 967
        assert_eq!(String::from_utf8(xs).unwrap(),
                   String::from_str("ศไทย中华Việt Nam"));
968

N
NODA, Kai 已提交
969
        let xs = b"hello\xFF".to_vec();
970 971 972
        let err = String::from_utf8(xs).err().unwrap();
        assert_eq!(err.utf8_error(), Utf8Error::TooShort);
        assert_eq!(err.into_bytes(), b"hello\xff".to_vec());
973 974 975 976 977
    }

    #[test]
    fn test_from_utf8_lossy() {
        let xs = b"hello";
A
Alex Crichton 已提交
978
        let ys: CowString = "hello".into_cow();
J
Jorge Aparicio 已提交
979
        assert_eq!(String::from_utf8_lossy(xs), ys);
980

A
Adolfo Ochagavía 已提交
981
        let xs = "ศไทย中华Việt Nam".as_bytes();
A
Alex Crichton 已提交
982
        let ys: CowString = "ศไทย中华Việt Nam".into_cow();
J
Jorge Aparicio 已提交
983
        assert_eq!(String::from_utf8_lossy(xs), ys);
984 985

        let xs = b"Hello\xC2 There\xFF Goodbye";
A
Adolfo Ochagavía 已提交
986
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
987
                   String::from_str("Hello\u{FFFD} There\u{FFFD} Goodbye").into_cow());
988 989 990

        let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
991
                   String::from_str("Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye").into_cow());
992 993

        let xs = b"\xF5foo\xF5\x80bar";
A
Adolfo Ochagavía 已提交
994
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
995
                   String::from_str("\u{FFFD}foo\u{FFFD}\u{FFFD}bar").into_cow());
996 997

        let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
A
Adolfo Ochagavía 已提交
998
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
999
                   String::from_str("\u{FFFD}foo\u{FFFD}bar\u{FFFD}baz").into_cow());
1000 1001 1002

        let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
        assert_eq!(String::from_utf8_lossy(xs),
A
Alex Crichton 已提交
1003
                   String::from_str("\u{FFFD}foo\u{FFFD}bar\u{FFFD}\u{FFFD}baz").into_cow());
1004 1005

        let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
A
Alex Crichton 已提交
1006 1007
        assert_eq!(String::from_utf8_lossy(xs), String::from_str("\u{FFFD}\u{FFFD}\u{FFFD}\u{FFFD}\
                                               foo\u{10000}bar").into_cow());
1008 1009 1010

        // surrogates
        let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
A
Alex Crichton 已提交
1011 1012
        assert_eq!(String::from_utf8_lossy(xs), String::from_str("\u{FFFD}\u{FFFD}\u{FFFD}foo\
                                               \u{FFFD}\u{FFFD}\u{FFFD}bar").into_cow());
1013 1014
    }

1015 1016 1017
    #[test]
    fn test_from_utf16() {
        let pairs =
A
Adolfo Ochagavía 已提交
1018
            [(String::from_str("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n"),
1019 1020 1021 1022 1023
              vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
                0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
                0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
                0xd800_u16, 0xdf30_u16, 0x000a_u16]),

A
Adolfo Ochagavía 已提交
1024
             (String::from_str("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n"),
1025 1026 1027 1028 1029 1030 1031
              vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
                0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
                0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
                0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
                0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
                0x000a_u16]),

A
Adolfo Ochagavía 已提交
1032
             (String::from_str("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n"),
1033 1034 1035 1036 1037 1038 1039 1040
              vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
                0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
                0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
                0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
                0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
                0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
                0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),

A
Adolfo Ochagavía 已提交
1041
             (String::from_str("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n"),
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
              vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
                0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
                0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
                0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
                0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
                0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
                0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
                0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
                0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
                0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
                0x000a_u16 ]),
             // Issue #12318, even-numbered non-BMP planes
A
Alex Crichton 已提交
1054
             (String::from_str("\u{20000}"),
1055 1056 1057 1058
              vec![0xD840, 0xDC00])];

        for p in pairs.iter() {
            let (s, u) = (*p).clone();
1059
            let s_as_utf16 = s.utf16_units().collect::<Vec<u16>>();
1060 1061
            let u_as_string = String::from_utf16(u.as_slice()).unwrap();

A
Alex Crichton 已提交
1062
            assert!(::unicode::str::is_utf16(u.as_slice()));
1063 1064 1065 1066 1067 1068
            assert_eq!(s_as_utf16, u);

            assert_eq!(u_as_string, s);
            assert_eq!(String::from_utf16_lossy(u.as_slice()), s);

            assert_eq!(String::from_utf16(s_as_utf16.as_slice()).unwrap(), s);
1069
            assert_eq!(u_as_string.utf16_units().collect::<Vec<u16>>(), u);
1070 1071 1072 1073 1074 1075 1076
        }
    }

    #[test]
    fn test_utf16_invalid() {
        // completely positive cases tested above.
        // lead + eof
1077
        assert!(String::from_utf16(&[0xD800]).is_err());
1078
        // lead + lead
1079
        assert!(String::from_utf16(&[0xD800, 0xD800]).is_err());
1080 1081

        // isolated trail
1082
        assert!(String::from_utf16(&[0x0061, 0xDC00]).is_err());
1083 1084

        // general
1085
        assert!(String::from_utf16(&[0xD800, 0xd801, 0xdc8b, 0xD800]).is_err());
1086 1087 1088 1089 1090 1091
    }

    #[test]
    fn test_from_utf16_lossy() {
        // completely positive cases tested above.
        // lead + eof
A
Alex Crichton 已提交
1092
        assert_eq!(String::from_utf16_lossy(&[0xD800]), String::from_str("\u{FFFD}"));
1093
        // lead + lead
A
Alex Crichton 已提交
1094 1095
        assert_eq!(String::from_utf16_lossy(&[0xD800, 0xD800]),
                   String::from_str("\u{FFFD}\u{FFFD}"));
1096 1097

        // isolated trail
A
Alex Crichton 已提交
1098
        assert_eq!(String::from_utf16_lossy(&[0x0061, 0xDC00]), String::from_str("a\u{FFFD}"));
1099 1100

        // general
N
Nick Cameron 已提交
1101
        assert_eq!(String::from_utf16_lossy(&[0xD800, 0xd801, 0xdc8b, 0xD800]),
A
Alex Crichton 已提交
1102
                   String::from_str("\u{FFFD}𐒋\u{FFFD}"));
1103
    }
1104

1105 1106
    #[test]
    fn test_push_bytes() {
1107
        let mut s = String::from_str("ABC");
1108
        unsafe {
N
NODA, Kai 已提交
1109
            let mv = s.as_mut_vec();
N
Nick Cameron 已提交
1110
            mv.push_all(&[b'D']);
1111
        }
1112
        assert_eq!(s, "ABCD");
1113 1114 1115 1116
    }

    #[test]
    fn test_push_str() {
1117
        let mut s = String::new();
1118
        s.push_str("");
1119
        assert_eq!(s.slice_from(0), "");
1120
        s.push_str("abc");
1121
        assert_eq!(s.slice_from(0), "abc");
1122
        s.push_str("ประเทศไทย中华Việt Nam");
1123
        assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
1124 1125 1126
    }

    #[test]
1127
    fn test_push() {
1128
        let mut data = String::from_str("ประเทศไทย中");
1129 1130 1131 1132 1133
        data.push('华');
        data.push('b'); // 1 byte
        data.push('¢'); // 2 byte
        data.push('€'); // 3 byte
        data.push('𤭢'); // 4 byte
1134
        assert_eq!(data, "ประเทศไทย中华b¢€𤭢");
1135 1136
    }

1137
    #[test]
N
NODA, Kai 已提交
1138
    fn test_pop() {
1139
        let mut data = String::from_str("ประเทศไทย中华b¢€𤭢");
N
NODA, Kai 已提交
1140 1141 1142 1143 1144
        assert_eq!(data.pop().unwrap(), '𤭢'); // 4 bytes
        assert_eq!(data.pop().unwrap(), '€'); // 3 bytes
        assert_eq!(data.pop().unwrap(), '¢'); // 2 bytes
        assert_eq!(data.pop().unwrap(), 'b'); // 1 bytes
        assert_eq!(data.pop().unwrap(), '华');
1145
        assert_eq!(data, "ประเทศไทย中");
1146 1147
    }

1148 1149
    #[test]
    fn test_str_truncate() {
1150
        let mut s = String::from_str("12345");
1151
        s.truncate(5);
1152
        assert_eq!(s, "12345");
1153
        s.truncate(3);
1154
        assert_eq!(s, "123");
1155
        s.truncate(0);
1156
        assert_eq!(s, "");
1157

1158
        let mut s = String::from_str("12345");
1159
        let p = s.as_ptr();
1160 1161
        s.truncate(3);
        s.push_str("6");
1162
        let p_ = s.as_ptr();
1163 1164 1165 1166 1167 1168
        assert_eq!(p_, p);
    }

    #[test]
    #[should_fail]
    fn test_str_truncate_invalid_len() {
1169
        let mut s = String::from_str("12345");
1170 1171 1172 1173 1174 1175
        s.truncate(6);
    }

    #[test]
    #[should_fail]
    fn test_str_truncate_split_codepoint() {
A
Alex Crichton 已提交
1176
        let mut s = String::from_str("\u{FC}"); // ü
1177 1178
        s.truncate(1);
    }
1179 1180 1181

    #[test]
    fn test_str_clear() {
1182
        let mut s = String::from_str("12345");
1183 1184
        s.clear();
        assert_eq!(s.len(), 0);
1185
        assert_eq!(s, "");
1186
    }
1187 1188 1189 1190 1191

    #[test]
    fn test_str_add() {
        let a = String::from_str("12345");
        let b = a + "2";
J
Jorge Aparicio 已提交
1192
        let b = b + "2";
1193
        assert_eq!(b.len(), 7);
1194
        assert_eq!(b, "1234522");
1195
    }
1196

1197 1198 1199
    #[test]
    fn remove() {
        let mut s = "ศไทย中华Việt Nam; foobar".to_string();;
1200
        assert_eq!(s.remove(0), 'ศ');
1201
        assert_eq!(s.len(), 33);
1202
        assert_eq!(s, "ไทย中华Việt Nam; foobar");
1203
        assert_eq!(s.remove(17), 'ệ');
1204
        assert_eq!(s, "ไทย中华Vit Nam; foobar");
1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215
    }

    #[test] #[should_fail]
    fn remove_bad() {
        "ศ".to_string().remove(1);
    }

    #[test]
    fn insert() {
        let mut s = "foobar".to_string();
        s.insert(0, 'ệ');
1216
        assert_eq!(s, "ệfoobar");
1217
        s.insert(6, 'ย');
1218
        assert_eq!(s, "ệfooยbar");
1219 1220 1221 1222 1223
    }

    #[test] #[should_fail] fn insert_bad1() { "".to_string().insert(1, 't'); }
    #[test] #[should_fail] fn insert_bad2() { "ệ".to_string().insert(1, 't'); }

1224 1225 1226 1227 1228 1229 1230 1231 1232
    #[test]
    fn test_slicing() {
        let s = "foobar".to_string();
        assert_eq!("foobar", s[]);
        assert_eq!("foo", s[..3]);
        assert_eq!("bar", s[3..]);
        assert_eq!("oob", s[1..4]);
    }

1233 1234
    #[test]
    fn test_simple_types() {
1235 1236 1237 1238 1239 1240 1241 1242
        assert_eq!(1i.to_string(), "1");
        assert_eq!((-1i).to_string(), "-1");
        assert_eq!(200u.to_string(), "200");
        assert_eq!(2u8.to_string(), "2");
        assert_eq!(true.to_string(), "true");
        assert_eq!(false.to_string(), "false");
        assert_eq!(().to_string(), "()");
        assert_eq!(("hi".to_string()).to_string(), "hi");
1243 1244 1245 1246 1247
    }

    #[test]
    fn test_vectors() {
        let x: Vec<int> = vec![];
1248 1249 1250
        assert_eq!(x.to_string(), "[]");
        assert_eq!((vec![1i]).to_string(), "[1]");
        assert_eq!((vec![1i, 2, 3]).to_string(), "[1, 2, 3]");
1251
        assert!((vec![vec![], vec![1i], vec![1i, 1]]).to_string() ==
1252
               "[[], [1], [1, 1]]");
1253 1254
    }

1255 1256 1257 1258 1259 1260 1261
    #[test]
    fn test_from_iterator() {
        let s = "ศไทย中华Việt Nam".to_string();
        let t = "ศไทย中华";
        let u = "Việt Nam";

        let a: String = s.chars().collect();
1262
        assert_eq!(s, a);
1263 1264 1265

        let mut b = t.to_string();
        b.extend(u.chars());
1266 1267 1268 1269 1270 1271 1272 1273
        assert_eq!(s, b);

        let c: String = vec![t, u].into_iter().collect();
        assert_eq!(s, c);

        let mut d = t.to_string();
        d.extend(vec![u].into_iter());
        assert_eq!(s, d);
1274 1275
    }

1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291
    #[bench]
    fn bench_with_capacity(b: &mut Bencher) {
        b.iter(|| {
            String::with_capacity(100)
        });
    }

    #[bench]
    fn bench_push_str(b: &mut Bencher) {
        let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
        b.iter(|| {
            let mut r = String::new();
            r.push_str(s);
        });
    }

1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326
    const REPETITIONS: u64 = 10_000;

    #[bench]
    fn bench_push_str_one_byte(b: &mut Bencher) {
        b.bytes = REPETITIONS;
        b.iter(|| {
            let mut r = String::new();
            for _ in range(0, REPETITIONS) {
                r.push_str("a")
            }
        });
    }

    #[bench]
    fn bench_push_char_one_byte(b: &mut Bencher) {
        b.bytes = REPETITIONS;
        b.iter(|| {
            let mut r = String::new();
            for _ in range(0, REPETITIONS) {
                r.push('a')
            }
        });
    }

    #[bench]
    fn bench_push_char_two_bytes(b: &mut Bencher) {
        b.bytes = REPETITIONS * 2;
        b.iter(|| {
            let mut r = String::new();
            for _ in range(0, REPETITIONS) {
                r.push('â')
            }
        });
    }

1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339
    #[bench]
    fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
        let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
                  Lorem ipsum dolor sit amet, consectetur. ";

        assert_eq!(100, s.len());
        b.iter(|| {
            let _ = String::from_utf8_lossy(s);
        });
    }

    #[bench]
    fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
1340
        let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
        assert_eq!(100, s.len());
        b.iter(|| {
            let _ = String::from_utf8_lossy(s);
        });
    }

    #[bench]
    fn from_utf8_lossy_invalid(b: &mut Bencher) {
        let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
        b.iter(|| {
            let _ = String::from_utf8_lossy(s);
        });
    }

    #[bench]
    fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
A
Alex Crichton 已提交
1357
        let s = repeat(0xf5u8).take(100).collect::<Vec<_>>();
1358 1359 1360 1361
        b.iter(|| {
            let _ = String::from_utf8_lossy(s.as_slice());
        });
    }
1362
}