String.java 122.3 KB
Newer Older
D
duke 已提交
1
/*
2
 * Copyright (c) 1994, 2017, Oracle and/or its affiliates. All rights reserved.
D
duke 已提交
3 4 5 6
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
7
 * published by the Free Software Foundation.  Oracle designates this
D
duke 已提交
8
 * particular file as subject to the "Classpath" exception as provided
9
 * by Oracle in the LICENSE file that accompanied this code.
D
duke 已提交
10 11 12 13 14 15 16 17 18 19 20
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
21 22 23
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
D
duke 已提交
24 25 26 27 28 29
 */

package java.lang;

import java.io.ObjectStreamField;
import java.io.UnsupportedEncodingException;
30
import java.lang.annotation.Native;
D
duke 已提交
31 32 33 34 35 36
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Formatter;
import java.util.Locale;
37
import java.util.Objects;
38
import java.util.Spliterator;
39
import java.util.StringJoiner;
D
duke 已提交
40 41 42
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
43 44
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
45
import jdk.internal.HotSpotIntrinsicCandidate;
46
import jdk.internal.vm.annotation.Stable;
D
duke 已提交
47 48

/**
49 50
 * The {@code String} class represents character strings. All
 * string literals in Java programs, such as {@code "abc"}, are
D
duke 已提交
51 52 53 54 55
 * implemented as instances of this class.
 * <p>
 * Strings are constant; their values cannot be changed after they
 * are created. String buffers support mutable strings.
 * Because String objects are immutable they can be shared. For example:
R
rriggs 已提交
56
 * <blockquote><pre>
D
duke 已提交
57 58 59
 *     String str = "abc";
 * </pre></blockquote><p>
 * is equivalent to:
R
rriggs 已提交
60
 * <blockquote><pre>
D
duke 已提交
61 62 63 64
 *     char data[] = {'a', 'b', 'c'};
 *     String str = new String(data);
 * </pre></blockquote><p>
 * Here are some more examples of how strings can be used:
R
rriggs 已提交
65
 * <blockquote><pre>
D
duke 已提交
66 67 68 69 70 71 72
 *     System.out.println("abc");
 *     String cde = "cde";
 *     System.out.println("abc" + cde);
 *     String c = "abc".substring(2,3);
 *     String d = cde.substring(1, 2);
 * </pre></blockquote>
 * <p>
73
 * The class {@code String} includes methods for examining
D
duke 已提交
74 75 76 77 78 79 80 81
 * individual characters of the sequence, for comparing strings, for
 * searching strings, for extracting substrings, and for creating a
 * copy of a string with all characters translated to uppercase or to
 * lowercase. Case mapping is based on the Unicode Standard version
 * specified by the {@link java.lang.Character Character} class.
 * <p>
 * The Java language provides special support for the string
 * concatenation operator (&nbsp;+&nbsp;), and for conversion of
82 83
 * other objects to strings. For additional information on string
 * concatenation and conversion, see <i>The Java&trade; Language Specification</i>.
D
duke 已提交
84
 *
85
 * <p> Unless otherwise noted, passing a {@code null} argument to a constructor
D
duke 已提交
86 87 88
 * or method in this class will cause a {@link NullPointerException} to be
 * thrown.
 *
89
 * <p>A {@code String} represents a string in the UTF-16 format
D
duke 已提交
90 91
 * in which <em>supplementary characters</em> are represented by <em>surrogate
 * pairs</em> (see the section <a href="Character.html#unicode">Unicode
92
 * Character Representations</a> in the {@code Character} class for
D
duke 已提交
93
 * more information).
94 95 96
 * Index values refer to {@code char} code units, so a supplementary
 * character uses two positions in a {@code String}.
 * <p>The {@code String} class provides methods for dealing with
D
duke 已提交
97
 * Unicode code points (i.e., characters), in addition to those for
98
 * dealing with Unicode code units (i.e., {@code char} values).
D
duke 已提交
99
 *
100 101 102 103
 * <p>Unless otherwise noted, methods for comparing Strings do not take locale
 * into account.  The {@link java.text.Collator} class provides methods for
 * finer-grain, locale-sensitive String comparison.
 *
104 105 106 107 108 109 110 111
 * @implNote The implementation of the string concatenation operator is left to
 * the discretion of a Java compiler, as long as the compiler ultimately conforms
 * to <i>The Java&trade; Language Specification</i>. For example, the {@code javac} compiler
 * may implement the operator with {@code StringBuffer}, {@code StringBuilder},
 * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The
 * implementation of string conversion is typically through the method {@code toString},
 * defined by {@code Object} and inherited by all classes in Java.
 *
D
duke 已提交
112 113
 * @author  Lee Boynton
 * @author  Arthur van Hoff
114 115
 * @author  Martin Buchholz
 * @author  Ulf Zibis
D
duke 已提交
116 117 118 119
 * @see     java.lang.Object#toString()
 * @see     java.lang.StringBuffer
 * @see     java.lang.StringBuilder
 * @see     java.nio.charset.Charset
120
 * @since   1.0
121
 * @jls     15.18.1 String Concatenation Operator +
D
duke 已提交
122 123 124
 */

public final class String
125
    implements java.io.Serializable, Comparable<String>, CharSequence {
T
thartmann 已提交
126

127 128 129 130 131 132 133 134 135 136 137 138
    /**
     * The value is used for character storage.
     *
     * @implNote This field is trusted by the VM, and is a subject to
     * constant folding if String instance is constant. Overwriting this
     * field after construction will cause problems.
     *
     * Additionally, it is marked with {@link Stable} to trust the contents
     * of the array. No other facility in JDK provides this functionality (yet).
     * {@link Stable} is safe here, because value is never null.
     */
    @Stable
T
thartmann 已提交
139 140 141 142 143 144 145 146 147
    private final byte[] value;

    /**
     * The identifier of the encoding used to encode the bytes in
     * {@code value}. The supported values in this implementation are
     *
     * LATIN1
     * UTF16
     *
148 149 150
     * @implNote This field is trusted by the VM, and is a subject to
     * constant folding if String instance is constant. Overwriting this
     * field after construction will cause problems.
T
thartmann 已提交
151 152
     */
    private final byte coder;
D
duke 已提交
153 154 155 156 157 158 159

    /** Cache the hash code for the string */
    private int hash; // Default to 0

    /** use serialVersionUID from JDK 1.0.2 for interoperability */
    private static final long serialVersionUID = -6849794470754667710L;

T
thartmann 已提交
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
    /**
     * If String compaction is disabled, the bytes in {@code value} are
     * always encoded in UTF16.
     *
     * For methods with several possible implementation paths, when String
     * compaction is disabled, only one code path is taken.
     *
     * The instance field value is generally opaque to optimizing JIT
     * compilers. Therefore, in performance-sensitive place, an explicit
     * check of the static boolean {@code COMPACT_STRINGS} is done first
     * before checking the {@code coder} field since the static boolean
     * {@code COMPACT_STRINGS} would be constant folded away by an
     * optimizing JIT compiler. The idioms for these cases are as follows.
     *
     * For code such as:
     *
     *    if (coder == LATIN1) { ... }
     *
     * can be written more optimally as
     *
     *    if (coder() == LATIN1) { ... }
     *
     * or:
     *
     *    if (COMPACT_STRINGS && coder == LATIN1) { ... }
     *
     * An optimizing JIT compiler can fold the above conditional as:
     *
     *    COMPACT_STRINGS == true  => if (coder == LATIN1) { ... }
     *    COMPACT_STRINGS == false => if (false)           { ... }
     *
     * @implNote
     * The actual value for this field is injected by JVM. The static
     * initialization block is used to set the value here to communicate
     * that this static final field is not statically foldable, and to
     * avoid any possible circular dependency during vm initialization.
     */
    static final boolean COMPACT_STRINGS;

    static {
        COMPACT_STRINGS = true;
    }

D
duke 已提交
203 204 205
    /**
     * Class String is special cased within the Serialization Stream Protocol.
     *
206
     * A String instance is written into an ObjectOutputStream according to
R
rriggs 已提交
207
     * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements">
208
     * Object Serialization Specification, Section 6.2, "Stream Elements"</a>
D
duke 已提交
209 210
     */
    private static final ObjectStreamField[] serialPersistentFields =
211
        new ObjectStreamField[0];
D
duke 已提交
212 213 214 215 216 217 218

    /**
     * Initializes a newly created {@code String} object so that it represents
     * an empty character sequence.  Note that use of this constructor is
     * unnecessary since Strings are immutable.
     */
    public String() {
219
        this.value = "".value;
T
thartmann 已提交
220
        this.coder = "".coder;
D
duke 已提交
221 222 223 224 225 226 227 228 229 230 231 232
    }

    /**
     * Initializes a newly created {@code String} object so that it represents
     * the same sequence of characters as the argument; in other words, the
     * newly created string is a copy of the argument string. Unless an
     * explicit copy of {@code original} is needed, use of this constructor is
     * unnecessary since Strings are immutable.
     *
     * @param  original
     *         A {@code String}
     */
233
    @HotSpotIntrinsicCandidate
D
duke 已提交
234
    public String(String original) {
235
        this.value = original.value;
T
thartmann 已提交
236
        this.coder = original.coder;
237
        this.hash = original.hash;
D
duke 已提交
238 239 240 241 242 243 244 245 246 247 248 249
    }

    /**
     * Allocates a new {@code String} so that it represents the sequence of
     * characters currently contained in the character array argument. The
     * contents of the character array are copied; subsequent modification of
     * the character array does not affect the newly created string.
     *
     * @param  value
     *         The initial value of the string
     */
    public String(char value[]) {
T
thartmann 已提交
250
        this(value, 0, value.length, null);
D
duke 已提交
251 252 253 254 255 256 257 258 259 260 261
    }

    /**
     * Allocates a new {@code String} that contains characters from a subarray
     * of the character array argument. The {@code offset} argument is the
     * index of the first character of the subarray and the {@code count}
     * argument specifies the length of the subarray. The contents of the
     * subarray are copied; subsequent modification of the character array does
     * not affect the newly created string.
     *
     * @param  value
262
     *         Array that is the source of characters
D
duke 已提交
263 264 265 266 267 268 269 270
     *
     * @param  offset
     *         The initial offset
     *
     * @param  count
     *         The length
     *
     * @throws  IndexOutOfBoundsException
271 272
     *          If {@code offset} is negative, {@code count} is negative, or
     *          {@code offset} is greater than {@code value.length - count}
D
duke 已提交
273 274
     */
    public String(char value[], int offset, int count) {
T
thartmann 已提交
275 276 277 278 279 280
        this(value, offset, count, rangeCheck(value, offset, count));
    }

    private static Void rangeCheck(char[] value, int offset, int count) {
        checkBoundsOffCount(offset, count, value.length);
        return null;
D
duke 已提交
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
    }

    /**
     * Allocates a new {@code String} that contains characters from a subarray
     * of the <a href="Character.html#unicode">Unicode code point</a> array
     * argument.  The {@code offset} argument is the index of the first code
     * point of the subarray and the {@code count} argument specifies the
     * length of the subarray.  The contents of the subarray are converted to
     * {@code char}s; subsequent modification of the {@code int} array does not
     * affect the newly created string.
     *
     * @param  codePoints
     *         Array that is the source of Unicode code points
     *
     * @param  offset
     *         The initial offset
     *
     * @param  count
     *         The length
     *
     * @throws  IllegalArgumentException
     *          If any invalid Unicode code point is found in {@code
     *          codePoints}
     *
     * @throws  IndexOutOfBoundsException
306 307
     *          If {@code offset} is negative, {@code count} is negative, or
     *          {@code offset} is greater than {@code codePoints.length - count}
D
duke 已提交
308 309 310 311
     *
     * @since  1.5
     */
    public String(int[] codePoints, int offset, int count) {
T
thartmann 已提交
312 313 314 315 316 317 318 319 320 321 322
        checkBoundsOffCount(offset, count, codePoints.length);
        if (count == 0) {
            this.value = "".value;
            this.coder = "".coder;
            return;
        }
        if (COMPACT_STRINGS) {
            byte[] val = StringLatin1.toBytes(codePoints, offset, count);
            if (val != null) {
                this.coder = LATIN1;
                this.value = val;
323 324
                return;
            }
D
duke 已提交
325
        }
T
thartmann 已提交
326 327
        this.coder = UTF16;
        this.value = StringUTF16.toBytes(codePoints, offset, count);
D
duke 已提交
328 329 330 331 332 333 334 335 336 337 338
    }

    /**
     * Allocates a new {@code String} constructed from a subarray of an array
     * of 8-bit integer values.
     *
     * <p> The {@code offset} argument is the index of the first byte of the
     * subarray, and the {@code count} argument specifies the length of the
     * subarray.
     *
     * <p> Each {@code byte} in the subarray is converted to a {@code char} as
S
smarks 已提交
339
     * specified in the {@link #String(byte[],int) String(byte[],int)} constructor.
D
duke 已提交
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
     *
     * @deprecated This method does not properly convert bytes into characters.
     * As of JDK&nbsp;1.1, the preferred way to do this is via the
     * {@code String} constructors that take a {@link
     * java.nio.charset.Charset}, charset name, or that use the platform's
     * default charset.
     *
     * @param  ascii
     *         The bytes to be converted to characters
     *
     * @param  hibyte
     *         The top 8 bits of each 16-bit Unicode code unit
     *
     * @param  offset
     *         The initial offset
     * @param  count
     *         The length
     *
     * @throws  IndexOutOfBoundsException
359 360
     *          If {@code offset} is negative, {@code count} is negative, or
     *          {@code offset} is greater than {@code ascii.length - count}
D
duke 已提交
361 362 363 364 365 366 367 368 369
     *
     * @see  #String(byte[], int)
     * @see  #String(byte[], int, int, java.lang.String)
     * @see  #String(byte[], int, int, java.nio.charset.Charset)
     * @see  #String(byte[], int, int)
     * @see  #String(byte[], java.lang.String)
     * @see  #String(byte[], java.nio.charset.Charset)
     * @see  #String(byte[])
     */
370
    @Deprecated(since="1.1")
D
duke 已提交
371
    public String(byte ascii[], int hibyte, int offset, int count) {
T
thartmann 已提交
372 373 374 375 376 377 378 379 380
        checkBoundsOffCount(offset, count, ascii.length);
        if (count == 0) {
            this.value = "".value;
            this.coder = "".coder;
            return;
        }
        if (COMPACT_STRINGS && (byte)hibyte == 0) {
            this.value = Arrays.copyOfRange(ascii, offset, offset + count);
            this.coder = LATIN1;
D
duke 已提交
381 382
        } else {
            hibyte <<= 8;
T
thartmann 已提交
383 384 385
            byte[] val = StringUTF16.newBytesFor(count);
            for (int i = 0; i < count; i++) {
                StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff));
D
duke 已提交
386
            }
T
thartmann 已提交
387 388
            this.value = val;
            this.coder = UTF16;
D
duke 已提交
389 390 391 392 393
        }
    }

    /**
     * Allocates a new {@code String} containing characters constructed from
S
smarks 已提交
394
     * an array of 8-bit integer values. Each character <i>c</i> in the
D
duke 已提交
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
     * resulting string is constructed from the corresponding component
     * <i>b</i> in the byte array such that:
     *
     * <blockquote><pre>
     *     <b><i>c</i></b> == (char)(((hibyte &amp; 0xff) &lt;&lt; 8)
     *                         | (<b><i>b</i></b> &amp; 0xff))
     * </pre></blockquote>
     *
     * @deprecated  This method does not properly convert bytes into
     * characters.  As of JDK&nbsp;1.1, the preferred way to do this is via the
     * {@code String} constructors that take a {@link
     * java.nio.charset.Charset}, charset name, or that use the platform's
     * default charset.
     *
     * @param  ascii
     *         The bytes to be converted to characters
     *
     * @param  hibyte
     *         The top 8 bits of each 16-bit Unicode code unit
     *
     * @see  #String(byte[], int, int, java.lang.String)
     * @see  #String(byte[], int, int, java.nio.charset.Charset)
     * @see  #String(byte[], int, int)
     * @see  #String(byte[], java.lang.String)
     * @see  #String(byte[], java.nio.charset.Charset)
     * @see  #String(byte[])
     */
422
    @Deprecated(since="1.1")
D
duke 已提交
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
    public String(byte ascii[], int hibyte) {
        this(ascii, hibyte, 0, ascii.length);
    }

    /**
     * Constructs a new {@code String} by decoding the specified subarray of
     * bytes using the specified charset.  The length of the new {@code String}
     * is a function of the charset, and hence may not be equal to the length
     * of the subarray.
     *
     * <p> The behavior of this constructor when the given bytes are not valid
     * in the given charset is unspecified.  The {@link
     * java.nio.charset.CharsetDecoder} class should be used when more control
     * over the decoding process is required.
     *
     * @param  bytes
     *         The bytes to be decoded into characters
     *
     * @param  offset
     *         The index of the first byte to decode
     *
     * @param  length
     *         The number of bytes to decode

     * @param  charsetName
     *         The name of a supported {@linkplain java.nio.charset.Charset
     *         charset}
     *
     * @throws  UnsupportedEncodingException
     *          If the named charset is not supported
     *
     * @throws  IndexOutOfBoundsException
455 456
     *          If {@code offset} is negative, {@code length} is negative, or
     *          {@code offset} is greater than {@code bytes.length - length}
D
duke 已提交
457
     *
458
     * @since  1.1
D
duke 已提交
459 460
     */
    public String(byte bytes[], int offset, int length, String charsetName)
461
            throws UnsupportedEncodingException {
D
duke 已提交
462 463
        if (charsetName == null)
            throw new NullPointerException("charsetName");
T
thartmann 已提交
464 465 466 467 468
        checkBoundsOffCount(offset, length, bytes.length);
        StringCoding.Result ret =
            StringCoding.decode(charsetName, bytes, offset, length);
        this.value = ret.value;
        this.coder = ret.coder;
D
duke 已提交
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
    }

    /**
     * Constructs a new {@code String} by decoding the specified subarray of
     * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
     * The length of the new {@code String} is a function of the charset, and
     * hence may not be equal to the length of the subarray.
     *
     * <p> This method always replaces malformed-input and unmappable-character
     * sequences with this charset's default replacement string.  The {@link
     * java.nio.charset.CharsetDecoder} class should be used when more control
     * over the decoding process is required.
     *
     * @param  bytes
     *         The bytes to be decoded into characters
     *
     * @param  offset
     *         The index of the first byte to decode
     *
     * @param  length
     *         The number of bytes to decode
     *
     * @param  charset
     *         The {@linkplain java.nio.charset.Charset charset} to be used to
     *         decode the {@code bytes}
     *
     * @throws  IndexOutOfBoundsException
496 497
     *          If {@code offset} is negative, {@code length} is negative, or
     *          {@code offset} is greater than {@code bytes.length - length}
D
duke 已提交
498 499 500 501 502 503
     *
     * @since  1.6
     */
    public String(byte bytes[], int offset, int length, Charset charset) {
        if (charset == null)
            throw new NullPointerException("charset");
T
thartmann 已提交
504 505 506 507 508
        checkBoundsOffCount(offset, length, bytes.length);
        StringCoding.Result ret =
            StringCoding.decode(charset, bytes, offset, length);
        this.value = ret.value;
        this.coder = ret.coder;
D
duke 已提交
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
    }

    /**
     * Constructs a new {@code String} by decoding the specified array of bytes
     * using the specified {@linkplain java.nio.charset.Charset charset}.  The
     * length of the new {@code String} is a function of the charset, and hence
     * may not be equal to the length of the byte array.
     *
     * <p> The behavior of this constructor when the given bytes are not valid
     * in the given charset is unspecified.  The {@link
     * java.nio.charset.CharsetDecoder} class should be used when more control
     * over the decoding process is required.
     *
     * @param  bytes
     *         The bytes to be decoded into characters
     *
     * @param  charsetName
     *         The name of a supported {@linkplain java.nio.charset.Charset
     *         charset}
     *
     * @throws  UnsupportedEncodingException
     *          If the named charset is not supported
     *
532
     * @since  1.1
D
duke 已提交
533 534
     */
    public String(byte bytes[], String charsetName)
535
            throws UnsupportedEncodingException {
D
duke 已提交
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
        this(bytes, 0, bytes.length, charsetName);
    }

    /**
     * Constructs a new {@code String} by decoding the specified array of
     * bytes using the specified {@linkplain java.nio.charset.Charset charset}.
     * The length of the new {@code String} is a function of the charset, and
     * hence may not be equal to the length of the byte array.
     *
     * <p> This method always replaces malformed-input and unmappable-character
     * sequences with this charset's default replacement string.  The {@link
     * java.nio.charset.CharsetDecoder} class should be used when more control
     * over the decoding process is required.
     *
     * @param  bytes
     *         The bytes to be decoded into characters
     *
     * @param  charset
     *         The {@linkplain java.nio.charset.Charset charset} to be used to
     *         decode the {@code bytes}
     *
     * @since  1.6
     */
    public String(byte bytes[], Charset charset) {
        this(bytes, 0, bytes.length, charset);
    }

    /**
     * Constructs a new {@code String} by decoding the specified subarray of
     * bytes using the platform's default charset.  The length of the new
     * {@code String} is a function of the charset, and hence may not be equal
     * to the length of the subarray.
     *
     * <p> The behavior of this constructor when the given bytes are not valid
     * in the default charset is unspecified.  The {@link
     * java.nio.charset.CharsetDecoder} class should be used when more control
     * over the decoding process is required.
     *
     * @param  bytes
     *         The bytes to be decoded into characters
     *
     * @param  offset
     *         The index of the first byte to decode
     *
     * @param  length
     *         The number of bytes to decode
     *
     * @throws  IndexOutOfBoundsException
584 585
     *          If {@code offset} is negative, {@code length} is negative, or
     *          {@code offset} is greater than {@code bytes.length - length}
D
duke 已提交
586
     *
587
     * @since  1.1
D
duke 已提交
588 589
     */
    public String(byte bytes[], int offset, int length) {
T
thartmann 已提交
590 591 592 593
        checkBoundsOffCount(offset, length, bytes.length);
        StringCoding.Result ret = StringCoding.decode(bytes, offset, length);
        this.value = ret.value;
        this.coder = ret.coder;
D
duke 已提交
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609
    }

    /**
     * Constructs a new {@code String} by decoding the specified array of bytes
     * using the platform's default charset.  The length of the new {@code
     * String} is a function of the charset, and hence may not be equal to the
     * length of the byte array.
     *
     * <p> The behavior of this constructor when the given bytes are not valid
     * in the default charset is unspecified.  The {@link
     * java.nio.charset.CharsetDecoder} class should be used when more control
     * over the decoding process is required.
     *
     * @param  bytes
     *         The bytes to be decoded into characters
     *
610
     * @since  1.1
D
duke 已提交
611
     */
612
    public String(byte[] bytes) {
D
duke 已提交
613 614 615 616 617 618 619 620 621 622 623 624 625
        this(bytes, 0, bytes.length);
    }

    /**
     * Allocates a new string that contains the sequence of characters
     * currently contained in the string buffer argument. The contents of the
     * string buffer are copied; subsequent modification of the string buffer
     * does not affect the newly created string.
     *
     * @param  buffer
     *         A {@code StringBuffer}
     */
    public String(StringBuffer buffer) {
T
thartmann 已提交
626
        this(buffer.toString());
D
duke 已提交
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
    }

    /**
     * Allocates a new string that contains the sequence of characters
     * currently contained in the string builder argument. The contents of the
     * string builder are copied; subsequent modification of the string builder
     * does not affect the newly created string.
     *
     * <p> This constructor is provided to ease migration to {@code
     * StringBuilder}. Obtaining a string from a string builder via the {@code
     * toString} method is likely to run faster and is generally preferred.
     *
     * @param   builder
     *          A {@code StringBuilder}
     *
     * @since  1.5
     */
    public String(StringBuilder builder) {
T
thartmann 已提交
645
        this(builder, null);
D
duke 已提交
646 647
    }

T
thartmann 已提交
648
   /*
649 650 651 652 653
    * Package private constructor which shares value array for speed.
    * this constructor is always expected to be called with share==true.
    * a separate constructor is needed because we already have a public
    * String(char[]) constructor that makes a copy of the given char[].
    */
T
thartmann 已提交
654 655 656
    // TBD: this is kept for package internal use (Thread/System),
    // should be removed if they all have a byte[] version
    String(char[] val, boolean share) {
657
        // assert share : "unshared not supported";
T
thartmann 已提交
658
        this(val, 0, val.length, null);
D
duke 已提交
659 660 661 662 663 664 665 666 667 668 669
    }

    /**
     * Returns the length of this string.
     * The length is equal to the number of <a href="Character.html#unicode">Unicode
     * code units</a> in the string.
     *
     * @return  the length of the sequence of characters represented by this
     *          object.
     */
    public int length() {
T
thartmann 已提交
670
        return value.length >> coder();
D
duke 已提交
671 672 673
    }

    /**
J
jgish 已提交
674
     * Returns {@code true} if, and only if, {@link #length()} is {@code 0}.
D
duke 已提交
675
     *
J
jgish 已提交
676 677
     * @return {@code true} if {@link #length()} is {@code 0}, otherwise
     * {@code false}
D
duke 已提交
678 679 680 681
     *
     * @since 1.6
     */
    public boolean isEmpty() {
682
        return value.length == 0;
D
duke 已提交
683 684 685
    }

    /**
686 687 688 689
     * Returns the {@code char} value at the
     * specified index. An index ranges from {@code 0} to
     * {@code length() - 1}. The first {@code char} value of the sequence
     * is at index {@code 0}, the next at index {@code 1},
D
duke 已提交
690 691
     * and so on, as for array indexing.
     *
692
     * <p>If the {@code char} value specified by the index is a
D
duke 已提交
693 694 695
     * <a href="Character.html#unicode">surrogate</a>, the surrogate
     * value is returned.
     *
696 697 698 699
     * @param      index   the index of the {@code char} value.
     * @return     the {@code char} value at the specified index of this string.
     *             The first {@code char} value is at index {@code 0}.
     * @exception  IndexOutOfBoundsException  if the {@code index}
D
duke 已提交
700 701 702 703
     *             argument is negative or not less than the length of this
     *             string.
     */
    public char charAt(int index) {
T
thartmann 已提交
704 705 706 707
        if (isLatin1()) {
            return StringLatin1.charAt(value, index);
        } else {
            return StringUTF16.charAt(value, index);
D
duke 已提交
708 709 710 711 712
        }
    }

    /**
     * Returns the character (Unicode code point) at the specified
713 714 715
     * index. The index refers to {@code char} values
     * (Unicode code units) and ranges from {@code 0} to
     * {@link #length()}{@code  - 1}.
D
duke 已提交
716
     *
717
     * <p> If the {@code char} value specified at the given index
D
duke 已提交
718
     * is in the high-surrogate range, the following index is less
719 720
     * than the length of this {@code String}, and the
     * {@code char} value at the following index is in the
D
duke 已提交
721 722
     * low-surrogate range, then the supplementary code point
     * corresponding to this surrogate pair is returned. Otherwise,
723
     * the {@code char} value at the given index is returned.
D
duke 已提交
724
     *
725
     * @param      index the index to the {@code char} values
D
duke 已提交
726
     * @return     the code point value of the character at the
727 728
     *             {@code index}
     * @exception  IndexOutOfBoundsException  if the {@code index}
D
duke 已提交
729 730 731 732 733
     *             argument is negative or not less than the length of this
     *             string.
     * @since      1.5
     */
    public int codePointAt(int index) {
T
thartmann 已提交
734 735 736
        if (isLatin1()) {
            checkIndex(index, value.length);
            return value[index] & 0xff;
D
duke 已提交
737
        }
T
thartmann 已提交
738 739 740
        int length = value.length >> 1;
        checkIndex(index, length);
        return StringUTF16.codePointAt(value, index, length);
D
duke 已提交
741 742 743 744
    }

    /**
     * Returns the character (Unicode code point) before the specified
745 746
     * index. The index refers to {@code char} values
     * (Unicode code units) and ranges from {@code 1} to {@link
D
duke 已提交
747 748
     * CharSequence#length() length}.
     *
749 750 751 752
     * <p> If the {@code char} value at {@code (index - 1)}
     * is in the low-surrogate range, {@code (index - 2)} is not
     * negative, and the {@code char} value at {@code (index -
     * 2)} is in the high-surrogate range, then the
D
duke 已提交
753
     * supplementary code point value of the surrogate pair is
754 755
     * returned. If the {@code char} value at {@code index -
     * 1} is an unpaired low-surrogate or a high-surrogate, the
D
duke 已提交
756 757 758 759
     * surrogate value is returned.
     *
     * @param     index the index following the code point that should be returned
     * @return    the Unicode code point value before the given index.
760
     * @exception IndexOutOfBoundsException if the {@code index}
D
duke 已提交
761 762 763 764 765 766
     *            argument is less than 1 or greater than the length
     *            of this string.
     * @since     1.5
     */
    public int codePointBefore(int index) {
        int i = index - 1;
T
thartmann 已提交
767
        if (i < 0 || i >= length()) {
D
duke 已提交
768 769
            throw new StringIndexOutOfBoundsException(index);
        }
T
thartmann 已提交
770 771 772 773
        if (isLatin1()) {
            return (value[i] & 0xff);
        }
        return StringUTF16.codePointBefore(value, index);
D
duke 已提交
774 775 776 777
    }

    /**
     * Returns the number of Unicode code points in the specified text
778 779 780 781 782
     * range of this {@code String}. The text range begins at the
     * specified {@code beginIndex} and extends to the
     * {@code char} at index {@code endIndex - 1}. Thus the
     * length (in {@code char}s) of the text range is
     * {@code endIndex-beginIndex}. Unpaired surrogates within
D
duke 已提交
783 784
     * the text range count as one code point each.
     *
785
     * @param beginIndex the index to the first {@code char} of
D
duke 已提交
786
     * the text range.
787
     * @param endIndex the index after the last {@code char} of
D
duke 已提交
788 789 790 791
     * the text range.
     * @return the number of Unicode code points in the specified text
     * range
     * @exception IndexOutOfBoundsException if the
792 793 794
     * {@code beginIndex} is negative, or {@code endIndex}
     * is larger than the length of this {@code String}, or
     * {@code beginIndex} is larger than {@code endIndex}.
D
duke 已提交
795 796 797
     * @since  1.5
     */
    public int codePointCount(int beginIndex, int endIndex) {
T
thartmann 已提交
798 799
        if (beginIndex < 0 || beginIndex > endIndex ||
            endIndex > length()) {
D
duke 已提交
800 801
            throw new IndexOutOfBoundsException();
        }
T
thartmann 已提交
802 803 804 805
        if (isLatin1()) {
            return endIndex - beginIndex;
        }
        return StringUTF16.codePointCount(value, beginIndex, endIndex);
D
duke 已提交
806 807 808
    }

    /**
809 810 811 812 813
     * Returns the index within this {@code String} that is
     * offset from the given {@code index} by
     * {@code codePointOffset} code points. Unpaired surrogates
     * within the text range given by {@code index} and
     * {@code codePointOffset} count as one code point each.
D
duke 已提交
814 815 816
     *
     * @param index the index to be offset
     * @param codePointOffset the offset in code points
817 818
     * @return the index within this {@code String}
     * @exception IndexOutOfBoundsException if {@code index}
D
duke 已提交
819
     *   is negative or larger then the length of this
820 821 822 823 824 825
     *   {@code String}, or if {@code codePointOffset} is positive
     *   and the substring starting with {@code index} has fewer
     *   than {@code codePointOffset} code points,
     *   or if {@code codePointOffset} is negative and the substring
     *   before {@code index} has fewer than the absolute value
     *   of {@code codePointOffset} code points.
D
duke 已提交
826 827 828
     * @since 1.5
     */
    public int offsetByCodePoints(int index, int codePointOffset) {
T
thartmann 已提交
829
        if (index < 0 || index > length()) {
D
duke 已提交
830 831
            throw new IndexOutOfBoundsException();
        }
T
thartmann 已提交
832
        return Character.offsetByCodePoints(this, index, codePointOffset);
D
duke 已提交
833 834 835 836 837 838
    }

    /**
     * Copies characters from this string into the destination character
     * array.
     * <p>
839 840
     * The first character to be copied is at index {@code srcBegin};
     * the last character to be copied is at index {@code srcEnd-1}
D
duke 已提交
841
     * (thus the total number of characters to be copied is
842 843
     * {@code srcEnd-srcBegin}). The characters are copied into the
     * subarray of {@code dst} starting at index {@code dstBegin}
D
duke 已提交
844
     * and ending at index:
R
rriggs 已提交
845
     * <blockquote><pre>
846
     *     dstBegin + (srcEnd-srcBegin) - 1
D
duke 已提交
847 848 849 850 851 852 853 854 855 856
     * </pre></blockquote>
     *
     * @param      srcBegin   index of the first character in the string
     *                        to copy.
     * @param      srcEnd     index after the last character in the string
     *                        to copy.
     * @param      dst        the destination array.
     * @param      dstBegin   the start offset in the destination array.
     * @exception IndexOutOfBoundsException If any of the following
     *            is true:
857 858 859
     *            <ul><li>{@code srcBegin} is negative.
     *            <li>{@code srcBegin} is greater than {@code srcEnd}
     *            <li>{@code srcEnd} is greater than the length of this
D
duke 已提交
860
     *                string
861 862 863
     *            <li>{@code dstBegin} is negative
     *            <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than
     *                {@code dst.length}</ul>
D
duke 已提交
864 865
     */
    public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
T
thartmann 已提交
866 867 868 869 870 871
        checkBoundsBeginEnd(srcBegin, srcEnd, length());
        checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
        if (isLatin1()) {
            StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin);
        } else {
            StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin);
D
duke 已提交
872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887
        }
    }

    /**
     * Copies characters from this string into the destination byte array. Each
     * byte receives the 8 low-order bits of the corresponding character. The
     * eight high-order bits of each character are not copied and do not
     * participate in the transfer in any way.
     *
     * <p> The first character to be copied is at index {@code srcBegin}; the
     * last character to be copied is at index {@code srcEnd-1}.  The total
     * number of characters to be copied is {@code srcEnd-srcBegin}. The
     * characters, converted to bytes, are copied into the subarray of {@code
     * dst} starting at index {@code dstBegin} and ending at index:
     *
     * <blockquote><pre>
888
     *     dstBegin + (srcEnd-srcBegin) - 1
D
duke 已提交
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917
     * </pre></blockquote>
     *
     * @deprecated  This method does not properly convert characters into
     * bytes.  As of JDK&nbsp;1.1, the preferred way to do this is via the
     * {@link #getBytes()} method, which uses the platform's default charset.
     *
     * @param  srcBegin
     *         Index of the first character in the string to copy
     *
     * @param  srcEnd
     *         Index after the last character in the string to copy
     *
     * @param  dst
     *         The destination array
     *
     * @param  dstBegin
     *         The start offset in the destination array
     *
     * @throws  IndexOutOfBoundsException
     *          If any of the following is true:
     *          <ul>
     *            <li> {@code srcBegin} is negative
     *            <li> {@code srcBegin} is greater than {@code srcEnd}
     *            <li> {@code srcEnd} is greater than the length of this String
     *            <li> {@code dstBegin} is negative
     *            <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
     *                 dst.length}
     *          </ul>
     */
918
    @Deprecated(since="1.1")
D
duke 已提交
919
    public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
T
thartmann 已提交
920
        checkBoundsBeginEnd(srcBegin, srcEnd, length());
921
        Objects.requireNonNull(dst);
T
thartmann 已提交
922 923 924 925 926
        checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length);
        if (isLatin1()) {
            StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
        } else {
            StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin);
D
duke 已提交
927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
        }
    }

    /**
     * Encodes this {@code String} into a sequence of bytes using the named
     * charset, storing the result into a new byte array.
     *
     * <p> The behavior of this method when this string cannot be encoded in
     * the given charset is unspecified.  The {@link
     * java.nio.charset.CharsetEncoder} class should be used when more control
     * over the encoding process is required.
     *
     * @param  charsetName
     *         The name of a supported {@linkplain java.nio.charset.Charset
     *         charset}
     *
     * @return  The resultant byte array
     *
     * @throws  UnsupportedEncodingException
     *          If the named charset is not supported
     *
948
     * @since  1.1
D
duke 已提交
949 950
     */
    public byte[] getBytes(String charsetName)
951
            throws UnsupportedEncodingException {
D
duke 已提交
952
        if (charsetName == null) throw new NullPointerException();
T
thartmann 已提交
953
        return StringCoding.encode(charsetName, coder(), value);
D
duke 已提交
954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
    }

    /**
     * Encodes this {@code String} into a sequence of bytes using the given
     * {@linkplain java.nio.charset.Charset charset}, storing the result into a
     * new byte array.
     *
     * <p> This method always replaces malformed-input and unmappable-character
     * sequences with this charset's default replacement byte array.  The
     * {@link java.nio.charset.CharsetEncoder} class should be used when more
     * control over the encoding process is required.
     *
     * @param  charset
     *         The {@linkplain java.nio.charset.Charset} to be used to encode
     *         the {@code String}
     *
     * @return  The resultant byte array
     *
     * @since  1.6
     */
    public byte[] getBytes(Charset charset) {
        if (charset == null) throw new NullPointerException();
T
thartmann 已提交
976 977
        return StringCoding.encode(charset, coder(), value);
     }
D
duke 已提交
978 979 980 981 982 983 984 985 986 987 988 989

    /**
     * Encodes this {@code String} into a sequence of bytes using the
     * platform's default charset, storing the result into a new byte array.
     *
     * <p> The behavior of this method when this string cannot be encoded in
     * the default charset is unspecified.  The {@link
     * java.nio.charset.CharsetEncoder} class should be used when more control
     * over the encoding process is required.
     *
     * @return  The resultant byte array
     *
990
     * @since      1.1
D
duke 已提交
991 992
     */
    public byte[] getBytes() {
T
thartmann 已提交
993
        return StringCoding.encode(coder(), value);
D
duke 已提交
994 995 996 997 998 999 1000 1001
    }

    /**
     * Compares this string to the specified object.  The result is {@code
     * true} if and only if the argument is not {@code null} and is a {@code
     * String} object that represents the same sequence of characters as this
     * object.
     *
1002 1003 1004
     * <p>For finer-grained String comparison, refer to
     * {@link java.text.Collator}.
     *
D
duke 已提交
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
     * @param  anObject
     *         The object to compare this {@code String} against
     *
     * @return  {@code true} if the given object represents a {@code String}
     *          equivalent to this string, {@code false} otherwise
     *
     * @see  #compareTo(String)
     * @see  #equalsIgnoreCase(String)
     */
    public boolean equals(Object anObject) {
        if (this == anObject) {
            return true;
        }
        if (anObject instanceof String) {
T
thartmann 已提交
1019 1020 1021 1022
            String aString = (String)anObject;
            if (coder() == aString.coder()) {
                return isLatin1() ? StringLatin1.equals(value, aString.value)
                                  : StringUTF16.equals(value, aString.value);
D
duke 已提交
1023 1024 1025 1026 1027 1028 1029 1030
            }
        }
        return false;
    }

    /**
     * Compares this string to the specified {@code StringBuffer}.  The result
     * is {@code true} if and only if this {@code String} represents the same
1031 1032
     * sequence of characters as the specified {@code StringBuffer}. This method
     * synchronizes on the {@code StringBuffer}.
D
duke 已提交
1033
     *
1034 1035 1036
     * <p>For finer-grained String comparison, refer to
     * {@link java.text.Collator}.
     *
D
duke 已提交
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
     * @param  sb
     *         The {@code StringBuffer} to compare this {@code String} against
     *
     * @return  {@code true} if this {@code String} represents the same
     *          sequence of characters as the specified {@code StringBuffer},
     *          {@code false} otherwise
     *
     * @since  1.4
     */
    public boolean contentEquals(StringBuffer sb) {
1047
        return contentEquals((CharSequence)sb);
1048 1049 1050
    }

    private boolean nonSyncContentEquals(AbstractStringBuilder sb) {
T
thartmann 已提交
1051 1052
        int len = length();
        if (len != sb.length()) {
1053 1054
            return false;
        }
T
thartmann 已提交
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
        byte v1[] = value;
        byte v2[] = sb.getValue();
        if (coder() == sb.getCoder()) {
            int n = v1.length;
            for (int i = 0; i < n; i++) {
                if (v1[i] != v2[i]) {
                    return false;
                }
            }
        } else {
            if (!isLatin1()) {  // utf16 str and latin1 abs can never be "equal"
1066 1067
                return false;
            }
1068
            return StringUTF16.contentEquals(v1, v2, len);
D
duke 已提交
1069
        }
1070
        return true;
D
duke 已提交
1071 1072 1073
    }

    /**
1074 1075 1076 1077 1078
     * Compares this string to the specified {@code CharSequence}.  The
     * result is {@code true} if and only if this {@code String} represents the
     * same sequence of char values as the specified sequence. Note that if the
     * {@code CharSequence} is a {@code StringBuffer} then the method
     * synchronizes on it.
D
duke 已提交
1079
     *
1080 1081 1082
     * <p>For finer-grained String comparison, refer to
     * {@link java.text.Collator}.
     *
D
duke 已提交
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
     * @param  cs
     *         The sequence to compare this {@code String} against
     *
     * @return  {@code true} if this {@code String} represents the same
     *          sequence of char values as the specified sequence, {@code
     *          false} otherwise
     *
     * @since  1.5
     */
    public boolean contentEquals(CharSequence cs) {
        // Argument is a StringBuffer, StringBuilder
        if (cs instanceof AbstractStringBuilder) {
1095 1096 1097 1098 1099 1100
            if (cs instanceof StringBuffer) {
                synchronized(cs) {
                   return nonSyncContentEquals((AbstractStringBuilder)cs);
                }
            } else {
                return nonSyncContentEquals((AbstractStringBuilder)cs);
D
duke 已提交
1101 1102 1103
            }
        }
        // Argument is a String
1104 1105 1106
        if (cs instanceof String) {
            return equals(cs);
        }
D
duke 已提交
1107
        // Argument is a generic CharSequence
T
thartmann 已提交
1108 1109
        int n = cs.length();
        if (n != length()) {
1110 1111
            return false;
        }
T
thartmann 已提交
1112 1113 1114 1115 1116 1117 1118 1119
        byte[] val = this.value;
        if (isLatin1()) {
            for (int i = 0; i < n; i++) {
                if ((val[i] & 0xff) != cs.charAt(i)) {
                    return false;
                }
            }
        } else {
1120 1121
            if (!StringUTF16.contentEquals(val, cs, n)) {
                return false;
1122
            }
D
duke 已提交
1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137
        }
        return true;
    }

    /**
     * Compares this {@code String} to another {@code String}, ignoring case
     * considerations.  Two strings are considered equal ignoring case if they
     * are of the same length and corresponding characters in the two strings
     * are equal ignoring case.
     *
     * <p> Two characters {@code c1} and {@code c2} are considered the same
     * ignoring case if at least one of the following is true:
     * <ul>
     *   <li> The two characters are the same (as compared by the
     *        {@code ==} operator)
1138 1139
     *   <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))}
     *        on each character produces the same result
D
duke 已提交
1140 1141
     * </ul>
     *
1142 1143 1144 1145
     * <p>Note that this method does <em>not</em> take locale into account, and
     * will result in unsatisfactory results for certain locales.  The
     * {@link java.text.Collator} class provides locale-sensitive comparison.
     *
D
duke 已提交
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
     * @param  anotherString
     *         The {@code String} to compare this {@code String} against
     *
     * @return  {@code true} if the argument is not {@code null} and it
     *          represents an equivalent {@code String} ignoring case; {@code
     *          false} otherwise
     *
     * @see  #equals(Object)
     */
    public boolean equalsIgnoreCase(String anotherString) {
1156 1157
        return (this == anotherString) ? true
                : (anotherString != null)
T
thartmann 已提交
1158 1159
                && (anotherString.length() == length())
                && regionMatches(true, 0, anotherString, 0, length());
D
duke 已提交
1160 1161 1162 1163 1164 1165
    }

    /**
     * Compares two strings lexicographically.
     * The comparison is based on the Unicode value of each character in
     * the strings. The character sequence represented by this
1166
     * {@code String} object is compared lexicographically to the
D
duke 已提交
1167
     * character sequence represented by the argument string. The result is
1168
     * a negative integer if this {@code String} object
D
duke 已提交
1169
     * lexicographically precedes the argument string. The result is a
1170
     * positive integer if this {@code String} object lexicographically
D
duke 已提交
1171
     * follows the argument string. The result is zero if the strings
1172 1173
     * are equal; {@code compareTo} returns {@code 0} exactly when
     * the {@link #equals(Object)} method would return {@code true}.
D
duke 已提交
1174 1175 1176 1177 1178 1179 1180
     * <p>
     * This is the definition of lexicographic ordering. If two strings are
     * different, then either they have different characters at some index
     * that is a valid index for both strings, or their lengths are different,
     * or both. If they have different characters at one or more index
     * positions, let <i>k</i> be the smallest such index; then the string
     * whose character at position <i>k</i> has the smaller value, as
1181
     * determined by using the {@code <} operator, lexicographically precedes the
1182 1183
     * other string. In this case, {@code compareTo} returns the
     * difference of the two character values at position {@code k} in
D
duke 已提交
1184 1185 1186 1187 1188 1189
     * the two string -- that is, the value:
     * <blockquote><pre>
     * this.charAt(k)-anotherString.charAt(k)
     * </pre></blockquote>
     * If there is no index position at which they differ, then the shorter
     * string lexicographically precedes the longer string. In this case,
1190
     * {@code compareTo} returns the difference of the lengths of the
D
duke 已提交
1191 1192 1193 1194 1195
     * strings -- that is, the value:
     * <blockquote><pre>
     * this.length()-anotherString.length()
     * </pre></blockquote>
     *
1196 1197 1198
     * <p>For finer-grained String comparison, refer to
     * {@link java.text.Collator}.
     *
1199 1200 1201
     * @param   anotherString   the {@code String} to be compared.
     * @return  the value {@code 0} if the argument string is equal to
     *          this string; a value less than {@code 0} if this string
D
duke 已提交
1202
     *          is lexicographically less than the string argument; and a
1203
     *          value greater than {@code 0} if this string is
D
duke 已提交
1204 1205 1206
     *          lexicographically greater than the string argument.
     */
    public int compareTo(String anotherString) {
T
thartmann 已提交
1207 1208 1209 1210 1211
        byte v1[] = value;
        byte v2[] = anotherString.value;
        if (coder() == anotherString.coder()) {
            return isLatin1() ? StringLatin1.compareTo(v1, v2)
                              : StringUTF16.compareTo(v1, v2);
D
duke 已提交
1212
        }
T
thartmann 已提交
1213 1214 1215
        return isLatin1() ? StringLatin1.compareToUTF16(v1, v2)
                          : StringUTF16.compareToLatin1(v1, v2);
     }
D
duke 已提交
1216 1217

    /**
1218 1219
     * A Comparator that orders {@code String} objects as by
     * {@code compareToIgnoreCase}. This comparator is serializable.
D
duke 已提交
1220 1221 1222
     * <p>
     * Note that this Comparator does <em>not</em> take locale into account,
     * and will result in an unsatisfactory ordering for certain locales.
1223
     * The {@link java.text.Collator} class provides locale-sensitive comparison.
D
duke 已提交
1224
     *
1225
     * @see     java.text.Collator
D
duke 已提交
1226 1227 1228 1229 1230
     * @since   1.2
     */
    public static final Comparator<String> CASE_INSENSITIVE_ORDER
                                         = new CaseInsensitiveComparator();
    private static class CaseInsensitiveComparator
1231
            implements Comparator<String>, java.io.Serializable {
D
duke 已提交
1232 1233 1234 1235
        // use serialVersionUID from JDK 1.2.2 for interoperability
        private static final long serialVersionUID = 8575799808933029326L;

        public int compare(String s1, String s2) {
T
thartmann 已提交
1236 1237
            byte v1[] = s1.value;
            byte v2[] = s2.value;
1238 1239 1240
            if (s1.coder() == s2.coder()) {
                return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2)
                                     : StringUTF16.compareToCI(v1, v2);
D
duke 已提交
1241
            }
1242 1243
            return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2)
                                 : StringUTF16.compareToCI_Latin1(v1, v2);
D
duke 已提交
1244
        }
1245 1246 1247

        /** Replaces the de-serialized object. */
        private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
D
duke 已提交
1248 1249 1250 1251 1252
    }

    /**
     * Compares two strings lexicographically, ignoring case
     * differences. This method returns an integer whose sign is that of
1253
     * calling {@code compareTo} with normalized versions of the strings
D
duke 已提交
1254
     * where case differences have been eliminated by calling
1255
     * {@code Character.toLowerCase(Character.toUpperCase(character))} on
D
duke 已提交
1256 1257 1258 1259
     * each character.
     * <p>
     * Note that this method does <em>not</em> take locale into account,
     * and will result in an unsatisfactory ordering for certain locales.
1260
     * The {@link java.text.Collator} class provides locale-sensitive comparison.
D
duke 已提交
1261
     *
1262
     * @param   str   the {@code String} to be compared.
D
duke 已提交
1263 1264 1265
     * @return  a negative integer, zero, or a positive integer as the
     *          specified String is greater than, equal to, or less
     *          than this String, ignoring case considerations.
1266
     * @see     java.text.Collator
D
duke 已提交
1267 1268 1269 1270 1271 1272 1273 1274 1275
     * @since   1.2
     */
    public int compareToIgnoreCase(String str) {
        return CASE_INSENSITIVE_ORDER.compare(this, str);
    }

    /**
     * Tests if two string regions are equal.
     * <p>
J
jgish 已提交
1276
     * A substring of this {@code String} object is compared to a substring
D
duke 已提交
1277 1278
     * of the argument other. The result is true if these substrings
     * represent identical character sequences. The substring of this
J
jgish 已提交
1279 1280 1281 1282
     * {@code String} object to be compared begins at index {@code toffset}
     * and has length {@code len}. The substring of other to be compared
     * begins at index {@code ooffset} and has length {@code len}. The
     * result is {@code false} if and only if at least one of the following
D
duke 已提交
1283
     * is true:
J
jgish 已提交
1284 1285 1286 1287 1288
     * <ul><li>{@code toffset} is negative.
     * <li>{@code ooffset} is negative.
     * <li>{@code toffset+len} is greater than the length of this
     * {@code String} object.
     * <li>{@code ooffset+len} is greater than the length of the other
D
duke 已提交
1289
     * argument.
J
jgish 已提交
1290
     * <li>There is some nonnegative integer <i>k</i> less than {@code len}
D
duke 已提交
1291
     * such that:
1292 1293
     * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + }
     * <i>k</i>{@code )}
D
duke 已提交
1294 1295
     * </ul>
     *
1296 1297 1298
     * <p>Note that this method does <em>not</em> take locale into account.  The
     * {@link java.text.Collator} class provides locale-sensitive comparison.
     *
D
duke 已提交
1299 1300 1301 1302 1303
     * @param   toffset   the starting offset of the subregion in this string.
     * @param   other     the string argument.
     * @param   ooffset   the starting offset of the subregion in the string
     *                    argument.
     * @param   len       the number of characters to compare.
1304
     * @return  {@code true} if the specified subregion of this string
D
duke 已提交
1305
     *          exactly matches the specified subregion of the string argument;
1306
     *          {@code false} otherwise.
D
duke 已提交
1307
     */
T
thartmann 已提交
1308 1309 1310
    public boolean regionMatches(int toffset, String other, int ooffset, int len) {
        byte tv[] = value;
        byte ov[] = other.value;
D
duke 已提交
1311
        // Note: toffset, ooffset, or len might be near -1>>>1.
T
thartmann 已提交
1312 1313 1314
        if ((ooffset < 0) || (toffset < 0) ||
             (toffset > (long)length() - len) ||
             (ooffset > (long)other.length() - len)) {
D
duke 已提交
1315 1316
            return false;
        }
T
thartmann 已提交
1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342
        if (coder() == other.coder()) {
            if (!isLatin1() && (len > 0)) {
                toffset = toffset << 1;
                ooffset = ooffset << 1;
                len = len << 1;
            }
            while (len-- > 0) {
                if (tv[toffset++] != ov[ooffset++]) {
                    return false;
                }
            }
        } else {
            if (coder() == LATIN1) {
                while (len-- > 0) {
                    if (StringLatin1.getChar(tv, toffset++) !=
                        StringUTF16.getChar(ov, ooffset++)) {
                        return false;
                    }
                }
            } else {
                while (len-- > 0) {
                    if (StringUTF16.getChar(tv, toffset++) !=
                        StringLatin1.getChar(ov, ooffset++)) {
                        return false;
                    }
                }
D
duke 已提交
1343 1344 1345 1346 1347 1348 1349 1350
            }
        }
        return true;
    }

    /**
     * Tests if two string regions are equal.
     * <p>
J
jgish 已提交
1351 1352
     * A substring of this {@code String} object is compared to a substring
     * of the argument {@code other}. The result is {@code true} if these
D
duke 已提交
1353
     * substrings represent character sequences that are the same, ignoring
J
jgish 已提交
1354 1355 1356 1357 1358
     * case if and only if {@code ignoreCase} is true. The substring of
     * this {@code String} object to be compared begins at index
     * {@code toffset} and has length {@code len}. The substring of
     * {@code other} to be compared begins at index {@code ooffset} and
     * has length {@code len}. The result is {@code false} if and only if
D
duke 已提交
1359
     * at least one of the following is true:
J
jgish 已提交
1360 1361 1362 1363 1364
     * <ul><li>{@code toffset} is negative.
     * <li>{@code ooffset} is negative.
     * <li>{@code toffset+len} is greater than the length of this
     * {@code String} object.
     * <li>{@code ooffset+len} is greater than the length of the other
D
duke 已提交
1365
     * argument.
J
jgish 已提交
1366 1367
     * <li>{@code ignoreCase} is {@code false} and there is some nonnegative
     * integer <i>k</i> less than {@code len} such that:
D
duke 已提交
1368 1369 1370
     * <blockquote><pre>
     * this.charAt(toffset+k) != other.charAt(ooffset+k)
     * </pre></blockquote>
J
jgish 已提交
1371 1372
     * <li>{@code ignoreCase} is {@code true} and there is some nonnegative
     * integer <i>k</i> less than {@code len} such that:
D
duke 已提交
1373
     * <blockquote><pre>
1374 1375
     * Character.toLowerCase(Character.toUpperCase(this.charAt(toffset+k))) !=
     Character.toLowerCase(Character.toUpperCase(other.charAt(ooffset+k)))
D
duke 已提交
1376 1377 1378
     * </pre></blockquote>
     * </ul>
     *
1379 1380 1381 1382 1383
     * <p>Note that this method does <em>not</em> take locale into account,
     * and will result in unsatisfactory results for certain locales when
     * {@code ignoreCase} is {@code true}.  The {@link java.text.Collator} class
     * provides locale-sensitive comparison.
     *
1384
     * @param   ignoreCase   if {@code true}, ignore case when comparing
D
duke 已提交
1385 1386 1387 1388 1389 1390 1391
     *                       characters.
     * @param   toffset      the starting offset of the subregion in this
     *                       string.
     * @param   other        the string argument.
     * @param   ooffset      the starting offset of the subregion in the string
     *                       argument.
     * @param   len          the number of characters to compare.
1392
     * @return  {@code true} if the specified subregion of this string
D
duke 已提交
1393
     *          matches the specified subregion of the string argument;
1394 1395
     *          {@code false} otherwise. Whether the matching is exact
     *          or case insensitive depends on the {@code ignoreCase}
D
duke 已提交
1396 1397 1398
     *          argument.
     */
    public boolean regionMatches(boolean ignoreCase, int toffset,
1399
            String other, int ooffset, int len) {
T
thartmann 已提交
1400 1401 1402
        if (!ignoreCase) {
            return regionMatches(toffset, other, ooffset, len);
        }
D
duke 已提交
1403
        // Note: toffset, ooffset, or len might be near -1>>>1.
1404
        if ((ooffset < 0) || (toffset < 0)
T
thartmann 已提交
1405 1406
                || (toffset > (long)length() - len)
                || (ooffset > (long)other.length() - len)) {
D
duke 已提交
1407 1408
            return false;
        }
T
thartmann 已提交
1409 1410 1411 1412 1413 1414
        byte tv[] = value;
        byte ov[] = other.value;
        if (coder() == other.coder()) {
            return isLatin1()
              ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len)
              : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len);
D
duke 已提交
1415
        }
T
thartmann 已提交
1416 1417 1418
        return isLatin1()
              ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len)
              : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len);
D
duke 已提交
1419 1420 1421 1422 1423 1424 1425 1426
    }

    /**
     * Tests if the substring of this string beginning at the
     * specified index starts with the specified prefix.
     *
     * @param   prefix    the prefix.
     * @param   toffset   where to begin looking in this string.
1427
     * @return  {@code true} if the character sequence represented by the
D
duke 已提交
1428
     *          argument is a prefix of the substring of this object starting
1429 1430
     *          at index {@code toffset}; {@code false} otherwise.
     *          The result is {@code false} if {@code toffset} is
D
duke 已提交
1431
     *          negative or greater than the length of this
1432
     *          {@code String} object; otherwise the result is the same
D
duke 已提交
1433 1434 1435 1436 1437 1438 1439
     *          as the result of the expression
     *          <pre>
     *          this.substring(toffset).startsWith(prefix)
     *          </pre>
     */
    public boolean startsWith(String prefix, int toffset) {
        // Note: toffset might be near -1>>>1.
T
thartmann 已提交
1440
        if (toffset < 0 || toffset > length() - prefix.length()) {
D
duke 已提交
1441 1442
            return false;
        }
T
thartmann 已提交
1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455
        byte ta[] = value;
        byte pa[] = prefix.value;
        int po = 0;
        int pc = pa.length;
        if (coder() == prefix.coder()) {
            int to = isLatin1() ? toffset : toffset << 1;
            while (po < pc) {
                if (ta[to++] != pa[po++]) {
                    return false;
                }
            }
        } else {
            if (isLatin1()) {  // && pcoder == UTF16
D
duke 已提交
1456 1457
                return false;
            }
T
thartmann 已提交
1458 1459 1460 1461 1462 1463
            // coder == UTF16 && pcoder == LATIN1)
            while (po < pc) {
                if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
                    return false;
               }
            }
D
duke 已提交
1464 1465 1466 1467 1468 1469 1470 1471
        }
        return true;
    }

    /**
     * Tests if this string starts with the specified prefix.
     *
     * @param   prefix   the prefix.
1472
     * @return  {@code true} if the character sequence represented by the
D
duke 已提交
1473
     *          argument is a prefix of the character sequence represented by
1474 1475
     *          this string; {@code false} otherwise.
     *          Note also that {@code true} will be returned if the
D
duke 已提交
1476
     *          argument is an empty string or is equal to this
1477
     *          {@code String} object as determined by the
D
duke 已提交
1478
     *          {@link #equals(Object)} method.
1479
     * @since   1.0
D
duke 已提交
1480 1481 1482 1483 1484 1485 1486 1487 1488
     */
    public boolean startsWith(String prefix) {
        return startsWith(prefix, 0);
    }

    /**
     * Tests if this string ends with the specified suffix.
     *
     * @param   suffix   the suffix.
1489
     * @return  {@code true} if the character sequence represented by the
D
duke 已提交
1490
     *          argument is a suffix of the character sequence represented by
1491 1492 1493
     *          this object; {@code false} otherwise. Note that the
     *          result will be {@code true} if the argument is the
     *          empty string or is equal to this {@code String} object
D
duke 已提交
1494 1495 1496
     *          as determined by the {@link #equals(Object)} method.
     */
    public boolean endsWith(String suffix) {
T
thartmann 已提交
1497
        return startsWith(suffix, length() - suffix.length());
D
duke 已提交
1498 1499 1500 1501
    }

    /**
     * Returns a hash code for this string. The hash code for a
1502
     * {@code String} object is computed as
D
duke 已提交
1503 1504 1505
     * <blockquote><pre>
     * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
     * </pre></blockquote>
1506 1507 1508
     * using {@code int} arithmetic, where {@code s[i]} is the
     * <i>i</i>th character of the string, {@code n} is the length of
     * the string, and {@code ^} indicates exponentiation.
D
duke 已提交
1509 1510 1511 1512 1513
     * (The hash value of the empty string is zero.)
     *
     * @return  a hash code value for this object.
     */
    public int hashCode() {
1514 1515 1516 1517
        int h = hash;
        if (h == 0 && value.length > 0) {
            hash = h = isLatin1() ? StringLatin1.hashCode(value)
                                  : StringUTF16.hashCode(value);
D
duke 已提交
1518
        }
1519
        return h;
D
duke 已提交
1520 1521 1522 1523 1524
    }

    /**
     * Returns the index within this string of the first occurrence of
     * the specified character. If a character with value
1525 1526
     * {@code ch} occurs in the character sequence represented by
     * this {@code String} object, then the index (in Unicode
D
duke 已提交
1527
     * code units) of the first such occurrence is returned. For
1528
     * values of {@code ch} in the range from 0 to 0xFFFF
D
duke 已提交
1529 1530 1531 1532
     * (inclusive), this is the smallest value <i>k</i> such that:
     * <blockquote><pre>
     * this.charAt(<i>k</i>) == ch
     * </pre></blockquote>
1533
     * is true. For other values of {@code ch}, it is the
D
duke 已提交
1534 1535 1536 1537 1538
     * smallest value <i>k</i> such that:
     * <blockquote><pre>
     * this.codePointAt(<i>k</i>) == ch
     * </pre></blockquote>
     * is true. In either case, if no such character occurs in this
1539
     * string, then {@code -1} is returned.
D
duke 已提交
1540 1541 1542 1543
     *
     * @param   ch   a character (Unicode code point).
     * @return  the index of the first occurrence of the character in the
     *          character sequence represented by this object, or
1544
     *          {@code -1} if the character does not occur.
D
duke 已提交
1545 1546 1547 1548 1549 1550 1551 1552 1553
     */
    public int indexOf(int ch) {
        return indexOf(ch, 0);
    }

    /**
     * Returns the index within this string of the first occurrence of the
     * specified character, starting the search at the specified index.
     * <p>
1554 1555 1556
     * If a character with value {@code ch} occurs in the
     * character sequence represented by this {@code String}
     * object at an index no smaller than {@code fromIndex}, then
D
duke 已提交
1557
     * the index of the first such occurrence is returned. For values
1558
     * of {@code ch} in the range from 0 to 0xFFFF (inclusive),
D
duke 已提交
1559 1560
     * this is the smallest value <i>k</i> such that:
     * <blockquote><pre>
J
jgish 已提交
1561
     * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
D
duke 已提交
1562
     * </pre></blockquote>
1563
     * is true. For other values of {@code ch}, it is the
D
duke 已提交
1564 1565
     * smallest value <i>k</i> such that:
     * <blockquote><pre>
J
jgish 已提交
1566
     * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &gt;= fromIndex)
D
duke 已提交
1567 1568
     * </pre></blockquote>
     * is true. In either case, if no such character occurs in this
1569 1570
     * string at or after position {@code fromIndex}, then
     * {@code -1} is returned.
D
duke 已提交
1571 1572
     *
     * <p>
1573
     * There is no restriction on the value of {@code fromIndex}. If it
D
duke 已提交
1574 1575 1576
     * is negative, it has the same effect as if it were zero: this entire
     * string may be searched. If it is greater than the length of this
     * string, it has the same effect as if it were equal to the length of
1577
     * this string: {@code -1} is returned.
D
duke 已提交
1578
     *
1579
     * <p>All indices are specified in {@code char} values
D
duke 已提交
1580 1581 1582 1583 1584 1585
     * (Unicode code units).
     *
     * @param   ch          a character (Unicode code point).
     * @param   fromIndex   the index to start the search from.
     * @return  the index of the first occurrence of the character in the
     *          character sequence represented by this object that is greater
1586
     *          than or equal to {@code fromIndex}, or {@code -1}
D
duke 已提交
1587 1588 1589
     *          if the character does not occur.
     */
    public int indexOf(int ch, int fromIndex) {
T
thartmann 已提交
1590 1591
        return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex)
                          : StringUTF16.indexOf(value, ch, fromIndex);
D
duke 已提交
1592 1593 1594 1595
    }

    /**
     * Returns the index within this string of the last occurrence of
1596
     * the specified character. For values of {@code ch} in the
D
duke 已提交
1597 1598 1599 1600 1601
     * range from 0 to 0xFFFF (inclusive), the index (in Unicode code
     * units) returned is the largest value <i>k</i> such that:
     * <blockquote><pre>
     * this.charAt(<i>k</i>) == ch
     * </pre></blockquote>
1602
     * is true. For other values of {@code ch}, it is the
D
duke 已提交
1603 1604 1605 1606 1607
     * largest value <i>k</i> such that:
     * <blockquote><pre>
     * this.codePointAt(<i>k</i>) == ch
     * </pre></blockquote>
     * is true.  In either case, if no such character occurs in this
1608 1609
     * string, then {@code -1} is returned.  The
     * {@code String} is searched backwards starting at the last
D
duke 已提交
1610 1611 1612 1613 1614
     * character.
     *
     * @param   ch   a character (Unicode code point).
     * @return  the index of the last occurrence of the character in the
     *          character sequence represented by this object, or
1615
     *          {@code -1} if the character does not occur.
D
duke 已提交
1616 1617
     */
    public int lastIndexOf(int ch) {
T
thartmann 已提交
1618
        return lastIndexOf(ch, length() - 1);
D
duke 已提交
1619 1620 1621 1622 1623
    }

    /**
     * Returns the index within this string of the last occurrence of
     * the specified character, searching backward starting at the
1624
     * specified index. For values of {@code ch} in the range
D
duke 已提交
1625 1626 1627
     * from 0 to 0xFFFF (inclusive), the index returned is the largest
     * value <i>k</i> such that:
     * <blockquote><pre>
J
jgish 已提交
1628
     * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
D
duke 已提交
1629
     * </pre></blockquote>
1630
     * is true. For other values of {@code ch}, it is the
D
duke 已提交
1631 1632
     * largest value <i>k</i> such that:
     * <blockquote><pre>
J
jgish 已提交
1633
     * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> &lt;= fromIndex)
D
duke 已提交
1634 1635
     * </pre></blockquote>
     * is true. In either case, if no such character occurs in this
1636 1637
     * string at or before position {@code fromIndex}, then
     * {@code -1} is returned.
D
duke 已提交
1638
     *
1639
     * <p>All indices are specified in {@code char} values
D
duke 已提交
1640 1641 1642 1643
     * (Unicode code units).
     *
     * @param   ch          a character (Unicode code point).
     * @param   fromIndex   the index to start the search from. There is no
1644
     *          restriction on the value of {@code fromIndex}. If it is
D
duke 已提交
1645 1646 1647 1648 1649 1650 1651
     *          greater than or equal to the length of this string, it has
     *          the same effect as if it were equal to one less than the
     *          length of this string: this entire string may be searched.
     *          If it is negative, it has the same effect as if it were -1:
     *          -1 is returned.
     * @return  the index of the last occurrence of the character in the
     *          character sequence represented by this object that is less
1652
     *          than or equal to {@code fromIndex}, or {@code -1}
D
duke 已提交
1653 1654 1655
     *          if the character does not occur before that point.
     */
    public int lastIndexOf(int ch, int fromIndex) {
T
thartmann 已提交
1656 1657
        return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex)
                          : StringUTF16.lastIndexOf(value, ch, fromIndex);
D
duke 已提交
1658 1659 1660 1661
    }

    /**
     * Returns the index within this string of the first occurrence of the
1662 1663
     * specified substring.
     *
1664 1665 1666 1667 1668
     * <p>The returned index is the smallest value {@code k} for which:
     * <pre>{@code
     * this.startsWith(str, k)
     * }</pre>
     * If no such value of {@code k} exists, then {@code -1} is returned.
D
duke 已提交
1669
     *
1670 1671 1672
     * @param   str   the substring to search for.
     * @return  the index of the first occurrence of the specified substring,
     *          or {@code -1} if there is no such occurrence.
D
duke 已提交
1673 1674
     */
    public int indexOf(String str) {
T
thartmann 已提交
1675 1676 1677 1678 1679 1680 1681 1682
        if (coder() == str.coder()) {
            return isLatin1() ? StringLatin1.indexOf(value, str.value)
                              : StringUTF16.indexOf(value, str.value);
        }
        if (coder() == LATIN1) {  // str.coder == UTF16
            return -1;
        }
        return StringUTF16.indexOfLatin1(value, str.value);
D
duke 已提交
1683 1684 1685 1686
    }

    /**
     * Returns the index within this string of the first occurrence of the
1687 1688
     * specified substring, starting at the specified index.
     *
1689 1690 1691 1692 1693 1694
     * <p>The returned index is the smallest value {@code k} for which:
     * <pre>{@code
     *     k >= Math.min(fromIndex, this.length()) &&
     *                   this.startsWith(str, k)
     * }</pre>
     * If no such value of {@code k} exists, then {@code -1} is returned.
D
duke 已提交
1695
     *
1696
     * @param   str         the substring to search for.
D
duke 已提交
1697
     * @param   fromIndex   the index from which to start the search.
1698 1699 1700
     * @return  the index of the first occurrence of the specified substring,
     *          starting at the specified index,
     *          or {@code -1} if there is no such occurrence.
D
duke 已提交
1701 1702
     */
    public int indexOf(String str, int fromIndex) {
T
thartmann 已提交
1703
        return indexOf(value, coder(), length(), str, fromIndex);
D
duke 已提交
1704 1705
    }

1706 1707 1708 1709 1710
    /**
     * Code shared by String and AbstractStringBuilder to do searches. The
     * source is the character array being searched, and the target
     * is the string being searched for.
     *
T
thartmann 已提交
1711 1712 1713 1714 1715
     * @param   src       the characters being searched.
     * @param   srcCoder  the coder of the source string.
     * @param   srcCount  length of the source string.
     * @param   tgtStr    the characters being searched for.
     * @param   fromIndex the index to begin searching from.
1716
     */
T
thartmann 已提交
1717 1718 1719 1720 1721 1722 1723 1724
    static int indexOf(byte[] src, byte srcCoder, int srcCount,
                       String tgtStr, int fromIndex) {
        byte[] tgt    = tgtStr.value;
        byte tgtCoder = tgtStr.coder();
        int tgtCount  = tgtStr.length();

        if (fromIndex >= srcCount) {
            return (tgtCount == 0 ? srcCount : -1);
D
duke 已提交
1725 1726 1727 1728
        }
        if (fromIndex < 0) {
            fromIndex = 0;
        }
T
thartmann 已提交
1729
        if (tgtCount == 0) {
D
duke 已提交
1730 1731
            return fromIndex;
        }
1732 1733 1734
        if (tgtCount > srcCount) {
            return -1;
        }
T
thartmann 已提交
1735 1736 1737 1738
        if (srcCoder == tgtCoder) {
            return srcCoder == LATIN1
                ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex)
                : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex);
D
duke 已提交
1739
        }
T
thartmann 已提交
1740 1741 1742 1743 1744
        if (srcCoder == LATIN1) {    //  && tgtCoder == UTF16
            return -1;
        }
        // srcCoder == UTF16 && tgtCoder == LATIN1) {
        return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
D
duke 已提交
1745 1746 1747
    }

    /**
1748 1749 1750 1751
     * Returns the index within this string of the last occurrence of the
     * specified substring.  The last occurrence of the empty string ""
     * is considered to occur at the index value {@code this.length()}.
     *
1752 1753 1754 1755 1756
     * <p>The returned index is the largest value {@code k} for which:
     * <pre>{@code
     * this.startsWith(str, k)
     * }</pre>
     * If no such value of {@code k} exists, then {@code -1} is returned.
D
duke 已提交
1757 1758
     *
     * @param   str   the substring to search for.
1759 1760
     * @return  the index of the last occurrence of the specified substring,
     *          or {@code -1} if there is no such occurrence.
D
duke 已提交
1761 1762
     */
    public int lastIndexOf(String str) {
T
thartmann 已提交
1763
        return lastIndexOf(str, length());
D
duke 已提交
1764 1765 1766 1767 1768
    }

    /**
     * Returns the index within this string of the last occurrence of the
     * specified substring, searching backward starting at the specified index.
1769
     *
1770 1771 1772 1773 1774 1775
     * <p>The returned index is the largest value {@code k} for which:
     * <pre>{@code
     *     k <= Math.min(fromIndex, this.length()) &&
     *                   this.startsWith(str, k)
     * }</pre>
     * If no such value of {@code k} exists, then {@code -1} is returned.
D
duke 已提交
1776 1777 1778
     *
     * @param   str         the substring to search for.
     * @param   fromIndex   the index to start the search from.
1779 1780 1781
     * @return  the index of the last occurrence of the specified substring,
     *          searching backward from the specified index,
     *          or {@code -1} if there is no such occurrence.
D
duke 已提交
1782 1783
     */
    public int lastIndexOf(String str, int fromIndex) {
T
thartmann 已提交
1784
        return lastIndexOf(value, coder(), length(), str, fromIndex);
D
duke 已提交
1785 1786
    }

1787 1788 1789 1790 1791
    /**
     * Code shared by String and AbstractStringBuilder to do searches. The
     * source is the character array being searched, and the target
     * is the string being searched for.
     *
T
thartmann 已提交
1792
     * @param   src         the characters being searched.
1793
     * @param   srcCoder    coder handles the mapping between bytes/chars
T
thartmann 已提交
1794 1795 1796 1797 1798 1799 1800 1801 1802
     * @param   srcCount    count of the source string.
     * @param   tgt         the characters being searched for.
     * @param   fromIndex   the index to begin searching from.
     */
    static int lastIndexOf(byte[] src, byte srcCoder, int srcCount,
                           String tgtStr, int fromIndex) {
        byte[] tgt = tgtStr.value;
        byte tgtCoder = tgtStr.coder();
        int tgtCount = tgtStr.length();
D
duke 已提交
1803 1804 1805 1806
        /*
         * Check arguments; return immediately where possible. For
         * consistency, don't check for null str.
         */
T
thartmann 已提交
1807
        int rightIndex = srcCount - tgtCount;
D
duke 已提交
1808 1809 1810
        if (fromIndex > rightIndex) {
            fromIndex = rightIndex;
        }
1811 1812 1813
        if (fromIndex < 0) {
            return -1;
        }
D
duke 已提交
1814
        /* Empty string always matches. */
T
thartmann 已提交
1815
        if (tgtCount == 0) {
D
duke 已提交
1816 1817
            return fromIndex;
        }
T
thartmann 已提交
1818 1819 1820 1821 1822 1823 1824 1825
        if (srcCoder == tgtCoder) {
            return srcCoder == LATIN1
                ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex)
                : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex);
        }
        if (srcCoder == LATIN1) {    // && tgtCoder == UTF16
            return -1;
        }
1826 1827
        // srcCoder == UTF16 && tgtCoder == LATIN1
        return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex);
D
duke 已提交
1828 1829 1830
    }

    /**
1831
     * Returns a string that is a substring of this string. The
D
duke 已提交
1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843
     * substring begins with the character at the specified index and
     * extends to the end of this string. <p>
     * Examples:
     * <blockquote><pre>
     * "unhappy".substring(2) returns "happy"
     * "Harbison".substring(3) returns "bison"
     * "emptiness".substring(9) returns "" (an empty string)
     * </pre></blockquote>
     *
     * @param      beginIndex   the beginning index, inclusive.
     * @return     the specified substring.
     * @exception  IndexOutOfBoundsException  if
1844 1845
     *             {@code beginIndex} is negative or larger than the
     *             length of this {@code String} object.
D
duke 已提交
1846 1847
     */
    public String substring(int beginIndex) {
T
thartmann 已提交
1848 1849
        if (beginIndex < 0) {
            throw new StringIndexOutOfBoundsException(beginIndex);
1850
        }
T
thartmann 已提交
1851
        int subLen = length() - beginIndex;
1852 1853 1854
        if (subLen < 0) {
            throw new StringIndexOutOfBoundsException(subLen);
        }
T
thartmann 已提交
1855 1856 1857 1858 1859
        if (beginIndex == 0) {
            return this;
        }
        return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
                          : StringUTF16.newString(value, beginIndex, subLen);
D
duke 已提交
1860 1861 1862
    }

    /**
1863
     * Returns a string that is a substring of this string. The
1864 1865 1866
     * substring begins at the specified {@code beginIndex} and
     * extends to the character at index {@code endIndex - 1}.
     * Thus the length of the substring is {@code endIndex-beginIndex}.
D
duke 已提交
1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877
     * <p>
     * Examples:
     * <blockquote><pre>
     * "hamburger".substring(4, 8) returns "urge"
     * "smiles".substring(1, 5) returns "mile"
     * </pre></blockquote>
     *
     * @param      beginIndex   the beginning index, inclusive.
     * @param      endIndex     the ending index, exclusive.
     * @return     the specified substring.
     * @exception  IndexOutOfBoundsException  if the
1878 1879 1880 1881 1882
     *             {@code beginIndex} is negative, or
     *             {@code endIndex} is larger than the length of
     *             this {@code String} object, or
     *             {@code beginIndex} is larger than
     *             {@code endIndex}.
D
duke 已提交
1883 1884
     */
    public String substring(int beginIndex, int endIndex) {
T
thartmann 已提交
1885 1886
        int length = length();
        checkBoundsBeginEnd(beginIndex, endIndex, length);
1887
        int subLen = endIndex - beginIndex;
T
thartmann 已提交
1888 1889
        if (beginIndex == 0 && endIndex == length) {
            return this;
D
duke 已提交
1890
        }
T
thartmann 已提交
1891 1892
        return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
                          : StringUTF16.newString(value, beginIndex, subLen);
D
duke 已提交
1893 1894 1895
    }

    /**
1896
     * Returns a character sequence that is a subsequence of this sequence.
D
duke 已提交
1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907
     *
     * <p> An invocation of this method of the form
     *
     * <blockquote><pre>
     * str.subSequence(begin,&nbsp;end)</pre></blockquote>
     *
     * behaves in exactly the same way as the invocation
     *
     * <blockquote><pre>
     * str.substring(begin,&nbsp;end)</pre></blockquote>
     *
1908
     * @apiNote
1909
     * This method is defined so that the {@code String} class can implement
1910
     * the {@link CharSequence} interface.
D
duke 已提交
1911
     *
1912 1913 1914
     * @param   beginIndex   the begin index, inclusive.
     * @param   endIndex     the end index, exclusive.
     * @return  the specified subsequence.
D
duke 已提交
1915 1916
     *
     * @throws  IndexOutOfBoundsException
1917 1918 1919
     *          if {@code beginIndex} or {@code endIndex} is negative,
     *          if {@code endIndex} is greater than {@code length()},
     *          or if {@code beginIndex} is greater than {@code endIndex}
D
duke 已提交
1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930
     *
     * @since 1.4
     * @spec JSR-51
     */
    public CharSequence subSequence(int beginIndex, int endIndex) {
        return this.substring(beginIndex, endIndex);
    }

    /**
     * Concatenates the specified string to the end of this string.
     * <p>
1931
     * If the length of the argument string is {@code 0}, then this
1932 1933
     * {@code String} object is returned. Otherwise, a
     * {@code String} object is returned that represents a character
D
duke 已提交
1934
     * sequence that is the concatenation of the character sequence
1935
     * represented by this {@code String} object and the character
D
duke 已提交
1936 1937 1938 1939 1940 1941 1942
     * sequence represented by the argument string.<p>
     * Examples:
     * <blockquote><pre>
     * "cares".concat("s") returns "caress"
     * "to".concat("get").concat("her") returns "together"
     * </pre></blockquote>
     *
1943 1944
     * @param   str   the {@code String} that is concatenated to the end
     *                of this {@code String}.
D
duke 已提交
1945 1946 1947 1948
     * @return  a string that represents the concatenation of this object's
     *          characters followed by the string argument's characters.
     */
    public String concat(String str) {
T
thartmann 已提交
1949 1950
        int olen = str.length();
        if (olen == 0) {
D
duke 已提交
1951 1952
            return this;
        }
T
thartmann 已提交
1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965
        if (coder() == str.coder()) {
            byte[] val = this.value;
            byte[] oval = str.value;
            int len = val.length + oval.length;
            byte[] buf = Arrays.copyOf(val, len);
            System.arraycopy(oval, 0, buf, val.length, oval.length);
            return new String(buf, coder);
        }
        int len = length();
        byte[] buf = StringUTF16.newBytesFor(len + olen);
        getBytes(buf, 0, UTF16);
        str.getBytes(buf, len, UTF16);
        return new String(buf, UTF16);
D
duke 已提交
1966 1967 1968
    }

    /**
1969
     * Returns a string resulting from replacing all occurrences of
1970
     * {@code oldChar} in this string with {@code newChar}.
D
duke 已提交
1971
     * <p>
1972 1973 1974
     * If the character {@code oldChar} does not occur in the
     * character sequence represented by this {@code String} object,
     * then a reference to this {@code String} object is returned.
1975
     * Otherwise, a {@code String} object is returned that
D
duke 已提交
1976
     * represents a character sequence identical to the character sequence
1977 1978 1979
     * represented by this {@code String} object, except that every
     * occurrence of {@code oldChar} is replaced by an occurrence
     * of {@code newChar}.
D
duke 已提交
1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
     * <p>
     * Examples:
     * <blockquote><pre>
     * "mesquite in your cellar".replace('e', 'o')
     *         returns "mosquito in your collar"
     * "the war of baronets".replace('r', 'y')
     *         returns "the way of bayonets"
     * "sparring with a purple porpoise".replace('p', 't')
     *         returns "starring with a turtle tortoise"
     * "JonL".replace('q', 'x') returns "JonL" (no change)
     * </pre></blockquote>
     *
     * @param   oldChar   the old character.
     * @param   newChar   the new character.
     * @return  a string derived from this string by replacing every
1995
     *          occurrence of {@code oldChar} with {@code newChar}.
D
duke 已提交
1996 1997 1998
     */
    public String replace(char oldChar, char newChar) {
        if (oldChar != newChar) {
T
thartmann 已提交
1999 2000 2001 2002
            String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
                                    : StringUTF16.replace(value, oldChar, newChar);
            if (ret != null) {
                return ret;
D
duke 已提交
2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
            }
        }
        return this;
    }

    /**
     * Tells whether or not this string matches the given <a
     * href="../util/regex/Pattern.html#sum">regular expression</a>.
     *
     * <p> An invocation of this method of the form
J
jgish 已提交
2013
     * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the
D
duke 已提交
2014 2015
     * same result as the expression
     *
J
jgish 已提交
2016 2017 2018 2019
     * <blockquote>
     * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence)
     * matches(<i>regex</i>, <i>str</i>)}
     * </blockquote>
D
duke 已提交
2020 2021 2022 2023
     *
     * @param   regex
     *          the regular expression to which this string is to be matched
     *
J
jgish 已提交
2024
     * @return  {@code true} if, and only if, this string matches the
D
duke 已提交
2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043
     *          given regular expression
     *
     * @throws  PatternSyntaxException
     *          if the regular expression's syntax is invalid
     *
     * @see java.util.regex.Pattern
     *
     * @since 1.4
     * @spec JSR-51
     */
    public boolean matches(String regex) {
        return Pattern.matches(regex, this);
    }

    /**
     * Returns true if and only if this string contains the specified
     * sequence of char values.
     *
     * @param s the sequence to search for
2044
     * @return true if this string contains {@code s}, false otherwise
D
duke 已提交
2045 2046 2047
     * @since 1.5
     */
    public boolean contains(CharSequence s) {
2048
        return indexOf(s.toString()) >= 0;
D
duke 已提交
2049 2050 2051 2052 2053 2054 2055 2056
    }

    /**
     * Replaces the first substring of this string that matches the given <a
     * href="../util/regex/Pattern.html#sum">regular expression</a> with the
     * given replacement.
     *
     * <p> An invocation of this method of the form
J
jgish 已提交
2057
     * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
D
duke 已提交
2058 2059
     * yields exactly the same result as the expression
     *
J
jgish 已提交
2060 2061 2062 2063 2064 2065 2066 2067
     * <blockquote>
     * <code>
     * {@link java.util.regex.Pattern}.{@link
     * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link
     * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
     * java.util.regex.Matcher#replaceFirst replaceFirst}(<i>repl</i>)
     * </code>
     * </blockquote>
D
duke 已提交
2068 2069
     *
     *<p>
J
jgish 已提交
2070
     * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
D
duke 已提交
2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081
     * replacement string may cause the results to be different than if it were
     * being treated as a literal replacement string; see
     * {@link java.util.regex.Matcher#replaceFirst}.
     * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
     * meaning of these characters, if desired.
     *
     * @param   regex
     *          the regular expression to which this string is to be matched
     * @param   replacement
     *          the string to be substituted for the first match
     *
J
jgish 已提交
2082
     * @return  The resulting {@code String}
D
duke 已提交
2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101
     *
     * @throws  PatternSyntaxException
     *          if the regular expression's syntax is invalid
     *
     * @see java.util.regex.Pattern
     *
     * @since 1.4
     * @spec JSR-51
     */
    public String replaceFirst(String regex, String replacement) {
        return Pattern.compile(regex).matcher(this).replaceFirst(replacement);
    }

    /**
     * Replaces each substring of this string that matches the given <a
     * href="../util/regex/Pattern.html#sum">regular expression</a> with the
     * given replacement.
     *
     * <p> An invocation of this method of the form
J
jgish 已提交
2102
     * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )}
D
duke 已提交
2103 2104
     * yields exactly the same result as the expression
     *
J
jgish 已提交
2105 2106 2107 2108 2109 2110 2111 2112
     * <blockquote>
     * <code>
     * {@link java.util.regex.Pattern}.{@link
     * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link
     * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link
     * java.util.regex.Matcher#replaceAll replaceAll}(<i>repl</i>)
     * </code>
     * </blockquote>
D
duke 已提交
2113 2114
     *
     *<p>
J
jgish 已提交
2115
     * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the
D
duke 已提交
2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126
     * replacement string may cause the results to be different than if it were
     * being treated as a literal replacement string; see
     * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}.
     * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special
     * meaning of these characters, if desired.
     *
     * @param   regex
     *          the regular expression to which this string is to be matched
     * @param   replacement
     *          the string to be substituted for each match
     *
J
jgish 已提交
2127
     * @return  The resulting {@code String}
D
duke 已提交
2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153
     *
     * @throws  PatternSyntaxException
     *          if the regular expression's syntax is invalid
     *
     * @see java.util.regex.Pattern
     *
     * @since 1.4
     * @spec JSR-51
     */
    public String replaceAll(String regex, String replacement) {
        return Pattern.compile(regex).matcher(this).replaceAll(replacement);
    }

    /**
     * Replaces each substring of this string that matches the literal target
     * sequence with the specified literal replacement sequence. The
     * replacement proceeds from the beginning of the string to the end, for
     * example, replacing "aa" with "b" in the string "aaa" will result in
     * "ba" rather than "ab".
     *
     * @param  target The sequence of char values to be replaced
     * @param  replacement The replacement sequence of char values
     * @return  The resulting string
     * @since 1.5
     */
    public String replace(CharSequence target, CharSequence replacement) {
T
thartmann 已提交
2154 2155 2156
        String tgtStr = target.toString();
        String replStr = replacement.toString();
        int j = indexOf(tgtStr);
2157 2158 2159
        if (j < 0) {
            return this;
        }
T
thartmann 已提交
2160 2161 2162 2163 2164
        int tgtLen = tgtStr.length();
        int tgtLen1 = Math.max(tgtLen, 1);
        int thisLen = length();

        int newLenHint = thisLen - tgtLen + replStr.length();
2165 2166 2167 2168 2169 2170
        if (newLenHint < 0) {
            throw new OutOfMemoryError();
        }
        StringBuilder sb = new StringBuilder(newLenHint);
        int i = 0;
        do {
T
thartmann 已提交
2171 2172 2173 2174
            sb.append(this, i, j).append(replStr);
            i = j + tgtLen;
        } while (j < thisLen && (j = indexOf(tgtStr, j + tgtLen1)) > 0);
        return sb.append(this, i, thisLen).toString();
D
duke 已提交
2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185
    }

    /**
     * Splits this string around matches of the given
     * <a href="../util/regex/Pattern.html#sum">regular expression</a>.
     *
     * <p> The array returned by this method contains each substring of this
     * string that is terminated by another substring that matches the given
     * expression or is terminated by the end of the string.  The substrings in
     * the array are in the order in which they occur in this string.  If the
     * expression does not match any part of the input then the resulting array
2186
     * has just one element, namely this string.
2187 2188 2189 2190 2191
     *
     * <p> When there is a positive-width match at the beginning of this
     * string then an empty leading substring is included at the beginning
     * of the resulting array. A zero-width match at the beginning however
     * never produces such empty leading substring.
D
duke 已提交
2192
     *
J
jgish 已提交
2193
     * <p> The {@code limit} parameter controls the number of times the
D
duke 已提交
2194 2195 2196 2197 2198 2199 2200 2201 2202 2203
     * pattern is applied and therefore affects the length of the resulting
     * array.  If the limit <i>n</i> is greater than zero then the pattern
     * will be applied at most <i>n</i>&nbsp;-&nbsp;1 times, the array's
     * length will be no greater than <i>n</i>, and the array's last entry
     * will contain all input beyond the last matched delimiter.  If <i>n</i>
     * is non-positive then the pattern will be applied as many times as
     * possible and the array can have any length.  If <i>n</i> is zero then
     * the pattern will be applied as many times as possible, the array can
     * have any length, and trailing empty strings will be discarded.
     *
J
jgish 已提交
2204
     * <p> The string {@code "boo:and:foo"}, for example, yields the
D
duke 已提交
2205 2206
     * following results with these parameters:
     *
2207 2208 2209
     * <blockquote><table class="plain">
     * <caption style="display:none">Split example showing regex, limit, and result</caption>
     * <thead>
D
duke 已提交
2210
     * <tr>
2211 2212 2213
     *     <th scope="col">Regex</th>
     *     <th scope="col">Limit</th>
     *     <th scope="col">Result</th>
D
duke 已提交
2214
     * </tr>
2215 2216
     * </thead>
     * <tbody>
2217 2218
     * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th>
     *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th>
J
jgish 已提交
2219
     *     <td>{@code { "boo", "and:foo" }}</td></tr>
2220 2221
     * <tr><!-- : -->
     *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
J
jgish 已提交
2222
     *     <td>{@code { "boo", "and", "foo" }}</td></tr>
2223 2224
     * <tr><!-- : -->
     *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
J
jgish 已提交
2225
     *     <td>{@code { "boo", "and", "foo" }}</td></tr>
2226 2227
     * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th>
     *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th>
J
jgish 已提交
2228
     *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
2229 2230
     * <tr><!-- o -->
     *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th>
J
jgish 已提交
2231
     *     <td>{@code { "b", "", ":and:f", "", "" }}</td></tr>
2232 2233
     * <tr><!-- o -->
     *     <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th>
J
jgish 已提交
2234
     *     <td>{@code { "b", "", ":and:f" }}</td></tr>
2235
     * </tbody>
D
duke 已提交
2236 2237 2238
     * </table></blockquote>
     *
     * <p> An invocation of this method of the form
J
jgish 已提交
2239
     * <i>str.</i>{@code split(}<i>regex</i>{@code ,}&nbsp;<i>n</i>{@code )}
D
duke 已提交
2240 2241 2242
     * yields the same result as the expression
     *
     * <blockquote>
J
jgish 已提交
2243 2244 2245 2246 2247
     * <code>
     * {@link java.util.regex.Pattern}.{@link
     * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link
     * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>,&nbsp;<i>n</i>)
     * </code>
D
duke 已提交
2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268
     * </blockquote>
     *
     *
     * @param  regex
     *         the delimiting regular expression
     *
     * @param  limit
     *         the result threshold, as described above
     *
     * @return  the array of strings computed by splitting this string
     *          around matches of the given regular expression
     *
     * @throws  PatternSyntaxException
     *          if the regular expression's syntax is invalid
     *
     * @see java.util.regex.Pattern
     *
     * @since 1.4
     * @spec JSR-51
     */
    public String[] split(String regex, int limit) {
2269
        /* fastpath if the regex is a
2270 2271 2272 2273 2274
         (1)one-char String and this character is not one of the
            RegEx's meta characters ".$|()[{^?*+\\", or
         (2)two-char String and the first char is the backslash and
            the second is not the ascii digit or ascii letter.
         */
2275
        char ch = 0;
T
thartmann 已提交
2276
        if (((regex.length() == 1 &&
2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288
             ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
             (regex.length() == 2 &&
              regex.charAt(0) == '\\' &&
              (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
              ((ch-'a')|('z'-ch)) < 0 &&
              ((ch-'A')|('Z'-ch)) < 0)) &&
            (ch < Character.MIN_HIGH_SURROGATE ||
             ch > Character.MAX_LOW_SURROGATE))
        {
            int off = 0;
            int next = 0;
            boolean limited = limit > 0;
2289
            ArrayList<String> list = new ArrayList<>();
2290 2291 2292 2293 2294 2295
            while ((next = indexOf(ch, off)) != -1) {
                if (!limited || list.size() < limit - 1) {
                    list.add(substring(off, next));
                    off = next + 1;
                } else {    // last one
                    //assert (list.size() == limit - 1);
T
thartmann 已提交
2296 2297 2298
                    int last = length();
                    list.add(substring(off, last));
                    off = last;
2299 2300 2301 2302 2303
                    break;
                }
            }
            // If no match was found, return this
            if (off == 0)
2304
                return new String[]{this};
2305 2306 2307

            // Add remaining segment
            if (!limited || list.size() < limit)
T
thartmann 已提交
2308
                list.add(substring(off, length()));
2309 2310 2311

            // Construct result
            int resultSize = list.size();
2312 2313
            if (limit == 0) {
                while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {
2314
                    resultSize--;
2315 2316
                }
            }
2317 2318 2319
            String[] result = new String[resultSize];
            return list.subList(0, resultSize).toArray(result);
        }
D
duke 已提交
2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331
        return Pattern.compile(regex).split(this, limit);
    }

    /**
     * Splits this string around matches of the given <a
     * href="../util/regex/Pattern.html#sum">regular expression</a>.
     *
     * <p> This method works as if by invoking the two-argument {@link
     * #split(String, int) split} method with the given expression and a limit
     * argument of zero.  Trailing empty strings are therefore not included in
     * the resulting array.
     *
J
jgish 已提交
2332
     * <p> The string {@code "boo:and:foo"}, for example, yields the following
D
duke 已提交
2333 2334
     * results with these expressions:
     *
2335 2336 2337
     * <blockquote><table class="plain">
     * <caption style="display:none">Split examples showing regex and result</caption>
     * <thead>
D
duke 已提交
2338
     * <tr>
2339 2340
     *  <th scope="col">Regex</th>
     *  <th scope="col">Result</th>
D
duke 已提交
2341
     * </tr>
2342 2343
     * </thead>
     * <tbody>
2344
     * <tr><th scope="row" style="text-weight:normal">:</th>
J
jgish 已提交
2345
     *     <td>{@code { "boo", "and", "foo" }}</td></tr>
2346
     * <tr><th scope="row" style="text-weight:normal">o</th>
J
jgish 已提交
2347
     *     <td>{@code { "b", "", ":and:f" }}</td></tr>
2348
     * </tbody>
D
duke 已提交
2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369
     * </table></blockquote>
     *
     *
     * @param  regex
     *         the delimiting regular expression
     *
     * @return  the array of strings computed by splitting this string
     *          around matches of the given regular expression
     *
     * @throws  PatternSyntaxException
     *          if the regular expression's syntax is invalid
     *
     * @see java.util.regex.Pattern
     *
     * @since 1.4
     * @spec JSR-51
     */
    public String[] split(String regex) {
        return split(regex, 0);
    }

2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412
    /**
     * Returns a new String composed of copies of the
     * {@code CharSequence elements} joined together with a copy of
     * the specified {@code delimiter}.
     *
     * <blockquote>For example,
     * <pre>{@code
     *     String message = String.join("-", "Java", "is", "cool");
     *     // message returned is: "Java-is-cool"
     * }</pre></blockquote>
     *
     * Note that if an element is null, then {@code "null"} is added.
     *
     * @param  delimiter the delimiter that separates each element
     * @param  elements the elements to join together.
     *
     * @return a new {@code String} that is composed of the {@code elements}
     *         separated by the {@code delimiter}
     *
     * @throws NullPointerException If {@code delimiter} or {@code elements}
     *         is {@code null}
     *
     * @see java.util.StringJoiner
     * @since 1.8
     */
    public static String join(CharSequence delimiter, CharSequence... elements) {
        Objects.requireNonNull(delimiter);
        Objects.requireNonNull(elements);
        // Number of elements not likely worth Arrays.stream overhead.
        StringJoiner joiner = new StringJoiner(delimiter);
        for (CharSequence cs: elements) {
            joiner.add(cs);
        }
        return joiner.toString();
    }

    /**
     * Returns a new {@code String} composed of copies of the
     * {@code CharSequence elements} joined together with a copy of the
     * specified {@code delimiter}.
     *
     * <blockquote>For example,
     * <pre>{@code
2413
     *     List<String> strings = List.of("Java", "is", "cool");
2414 2415 2416
     *     String message = String.join(" ", strings);
     *     //message returned is: "Java is cool"
     *
2417 2418
     *     Set<String> strings =
     *         new LinkedHashSet<>(List.of("Java", "is", "very", "cool"));
2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450
     *     String message = String.join("-", strings);
     *     //message returned is: "Java-is-very-cool"
     * }</pre></blockquote>
     *
     * Note that if an individual element is {@code null}, then {@code "null"} is added.
     *
     * @param  delimiter a sequence of characters that is used to separate each
     *         of the {@code elements} in the resulting {@code String}
     * @param  elements an {@code Iterable} that will have its {@code elements}
     *         joined together.
     *
     * @return a new {@code String} that is composed from the {@code elements}
     *         argument
     *
     * @throws NullPointerException If {@code delimiter} or {@code elements}
     *         is {@code null}
     *
     * @see    #join(CharSequence,CharSequence...)
     * @see    java.util.StringJoiner
     * @since 1.8
     */
    public static String join(CharSequence delimiter,
            Iterable<? extends CharSequence> elements) {
        Objects.requireNonNull(delimiter);
        Objects.requireNonNull(elements);
        StringJoiner joiner = new StringJoiner(delimiter);
        for (CharSequence cs: elements) {
            joiner.add(cs);
        }
        return joiner.toString();
    }

D
duke 已提交
2451
    /**
2452 2453
     * Converts all of the characters in this {@code String} to lower
     * case using the rules of the given {@code Locale}.  Case mapping is based
D
duke 已提交
2454 2455
     * on the Unicode Standard version specified by the {@link java.lang.Character Character}
     * class. Since case mappings are not always 1:1 char mappings, the resulting
2456
     * {@code String} may be a different length than the original {@code String}.
D
duke 已提交
2457 2458
     * <p>
     * Examples of lowercase  mappings are in the following table:
2459 2460 2461
     * <table class="plain">
     * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption>
     * <thead>
D
duke 已提交
2462
     * <tr>
2463 2464 2465 2466
     *   <th scope="col">Language Code of Locale</th>
     *   <th scope="col">Upper Case</th>
     *   <th scope="col">Lower Case</th>
     *   <th scope="col">Description</th>
D
duke 已提交
2467
     * </tr>
2468 2469
     * </thead>
     * <tbody>
D
duke 已提交
2470 2471
     * <tr>
     *   <td>tr (Turkish)</td>
2472
     *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0130</th>
D
duke 已提交
2473 2474 2475 2476 2477
     *   <td>&#92;u0069</td>
     *   <td>capital letter I with dot above -&gt; small letter i</td>
     * </tr>
     * <tr>
     *   <td>tr (Turkish)</td>
2478
     *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0049</th>
D
duke 已提交
2479 2480 2481 2482 2483
     *   <td>&#92;u0131</td>
     *   <td>capital letter I -&gt; small letter dotless i </td>
     * </tr>
     * <tr>
     *   <td>(all)</td>
2484
     *   <th scope="row" style="font-weight:normal; text-align:left">French Fries</th>
D
duke 已提交
2485 2486 2487 2488 2489
     *   <td>french fries</td>
     *   <td>lowercased all chars in String</td>
     * </tr>
     * <tr>
     *   <td>(all)</td>
2490
     *   <th scope="row" style="font-weight:normal; text-align:left">
2491 2492
     *       &Iota;&Chi;&Theta;&Upsilon;&Sigma;</th>
     *   <td>&iota;&chi;&theta;&upsilon;&sigma;</td>
D
duke 已提交
2493 2494
     *   <td>lowercased all chars in String</td>
     * </tr>
2495
     * </tbody>
D
duke 已提交
2496 2497 2498
     * </table>
     *
     * @param locale use the case transformation rules for this locale
2499
     * @return the {@code String}, converted to lowercase.
D
duke 已提交
2500 2501 2502 2503 2504 2505
     * @see     java.lang.String#toLowerCase()
     * @see     java.lang.String#toUpperCase()
     * @see     java.lang.String#toUpperCase(Locale)
     * @since   1.1
     */
    public String toLowerCase(Locale locale) {
T
thartmann 已提交
2506 2507
        return isLatin1() ? StringLatin1.toLowerCase(this, value, locale)
                          : StringUTF16.toLowerCase(this, value, locale);
D
duke 已提交
2508 2509 2510
    }

    /**
2511
     * Converts all of the characters in this {@code String} to lower
D
duke 已提交
2512
     * case using the rules of the default locale. This is equivalent to calling
2513
     * {@code toLowerCase(Locale.getDefault())}.
D
duke 已提交
2514 2515 2516 2517 2518 2519
     * <p>
     * <b>Note:</b> This method is locale sensitive, and may produce unexpected
     * results if used for strings that are intended to be interpreted locale
     * independently.
     * Examples are programming language identifiers, protocol keys, and HTML
     * tags.
2520 2521
     * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale
     * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the
M
martin 已提交
2522
     * LATIN SMALL LETTER DOTLESS I character.
D
duke 已提交
2523
     * To obtain correct results for locale insensitive strings, use
2524
     * {@code toLowerCase(Locale.ROOT)}.
2525
     *
2526
     * @return  the {@code String}, converted to lowercase.
D
duke 已提交
2527 2528 2529 2530 2531 2532 2533
     * @see     java.lang.String#toLowerCase(Locale)
     */
    public String toLowerCase() {
        return toLowerCase(Locale.getDefault());
    }

    /**
2534 2535
     * Converts all of the characters in this {@code String} to upper
     * case using the rules of the given {@code Locale}. Case mapping is based
D
duke 已提交
2536 2537
     * on the Unicode Standard version specified by the {@link java.lang.Character Character}
     * class. Since case mappings are not always 1:1 char mappings, the resulting
2538
     * {@code String} may be a different length than the original {@code String}.
D
duke 已提交
2539 2540
     * <p>
     * Examples of locale-sensitive and 1:M case mappings are in the following table.
R
rriggs 已提交
2541
     *
2542 2543 2544
     * <table class="plain">
     * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption>
     * <thead>
D
duke 已提交
2545
     * <tr>
2546 2547 2548 2549
     *   <th scope="col">Language Code of Locale</th>
     *   <th scope="col">Lower Case</th>
     *   <th scope="col">Upper Case</th>
     *   <th scope="col">Description</th>
D
duke 已提交
2550
     * </tr>
2551 2552
     * </thead>
     * <tbody>
D
duke 已提交
2553 2554
     * <tr>
     *   <td>tr (Turkish)</td>
2555
     *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0069</th>
D
duke 已提交
2556 2557 2558 2559 2560
     *   <td>&#92;u0130</td>
     *   <td>small letter i -&gt; capital letter I with dot above</td>
     * </tr>
     * <tr>
     *   <td>tr (Turkish)</td>
2561
     *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u0131</th>
D
duke 已提交
2562 2563 2564 2565 2566
     *   <td>&#92;u0049</td>
     *   <td>small letter dotless i -&gt; capital letter I</td>
     * </tr>
     * <tr>
     *   <td>(all)</td>
2567
     *   <th scope="row" style="font-weight:normal; text-align:left">&#92;u00df</th>
D
duke 已提交
2568 2569 2570 2571 2572
     *   <td>&#92;u0053 &#92;u0053</td>
     *   <td>small letter sharp s -&gt; two letters: SS</td>
     * </tr>
     * <tr>
     *   <td>(all)</td>
2573
     *   <th scope="row" style="font-weight:normal; text-align:left">Fahrvergn&uuml;gen</th>
D
duke 已提交
2574 2575 2576
     *   <td>FAHRVERGN&Uuml;GEN</td>
     *   <td></td>
     * </tr>
2577
     * </tbody>
D
duke 已提交
2578 2579
     * </table>
     * @param locale use the case transformation rules for this locale
2580
     * @return the {@code String}, converted to uppercase.
D
duke 已提交
2581 2582 2583 2584 2585 2586
     * @see     java.lang.String#toUpperCase()
     * @see     java.lang.String#toLowerCase()
     * @see     java.lang.String#toLowerCase(Locale)
     * @since   1.1
     */
    public String toUpperCase(Locale locale) {
T
thartmann 已提交
2587 2588
        return isLatin1() ? StringLatin1.toUpperCase(this, value, locale)
                          : StringUTF16.toUpperCase(this, value, locale);
D
duke 已提交
2589 2590 2591
    }

    /**
2592
     * Converts all of the characters in this {@code String} to upper
D
duke 已提交
2593
     * case using the rules of the default locale. This method is equivalent to
2594
     * {@code toUpperCase(Locale.getDefault())}.
D
duke 已提交
2595 2596 2597 2598 2599 2600
     * <p>
     * <b>Note:</b> This method is locale sensitive, and may produce unexpected
     * results if used for strings that are intended to be interpreted locale
     * independently.
     * Examples are programming language identifiers, protocol keys, and HTML
     * tags.
2601 2602
     * For instance, {@code "title".toUpperCase()} in a Turkish locale
     * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the
M
martin 已提交
2603
     * LATIN CAPITAL LETTER I WITH DOT ABOVE character.
D
duke 已提交
2604
     * To obtain correct results for locale insensitive strings, use
2605
     * {@code toUpperCase(Locale.ROOT)}.
2606
     *
2607
     * @return  the {@code String}, converted to uppercase.
D
duke 已提交
2608 2609 2610 2611 2612 2613 2614
     * @see     java.lang.String#toUpperCase(Locale)
     */
    public String toUpperCase() {
        return toUpperCase(Locale.getDefault());
    }

    /**
2615 2616
     * Returns a string whose value is this string, with any leading and trailing
     * whitespace removed.
D
duke 已提交
2617
     * <p>
2618
     * If this {@code String} object represents an empty character
D
duke 已提交
2619
     * sequence, or the first and last characters of character sequence
2620 2621 2622
     * represented by this {@code String} object both have codes
     * greater than {@code '\u005Cu0020'} (the space character), then a
     * reference to this {@code String} object is returned.
D
duke 已提交
2623 2624
     * <p>
     * Otherwise, if there is no character with a code greater than
2625 2626 2627
     * {@code '\u005Cu0020'} in the string, then a
     * {@code String} object representing an empty string is
     * returned.
D
duke 已提交
2628 2629
     * <p>
     * Otherwise, let <i>k</i> be the index of the first character in the
2630
     * string whose code is greater than {@code '\u005Cu0020'}, and let
D
duke 已提交
2631
     * <i>m</i> be the index of the last character in the string whose code
2632 2633
     * is greater than {@code '\u005Cu0020'}. A {@code String}
     * object is returned, representing the substring of this string that
D
duke 已提交
2634 2635
     * begins with the character at index <i>k</i> and ends with the
     * character at index <i>m</i>-that is, the result of
2636
     * {@code this.substring(k, m + 1)}.
D
duke 已提交
2637 2638 2639 2640
     * <p>
     * This method may be used to trim whitespace (as defined above) from
     * the beginning and end of a string.
     *
2641
     * @return  A string whose value is this string, with any leading and trailing white
D
duke 已提交
2642 2643 2644 2645
     *          space removed, or this string if it has no leading or
     *          trailing white space.
     */
    public String trim() {
T
thartmann 已提交
2646 2647 2648
        String ret = isLatin1() ? StringLatin1.trim(value)
                                : StringUTF16.trim(value);
        return ret == null ? this : ret;
D
duke 已提交
2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659
    }

    /**
     * This object (which is already a string!) is itself returned.
     *
     * @return  the string itself.
     */
    public String toString() {
        return this;
    }

2660 2661 2662 2663 2664 2665 2666
    /**
     * Returns a stream of {@code int} zero-extending the {@code char} values
     * from this sequence.  Any char which maps to a <a
     * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code
     * point</a> is passed through uninterpreted.
     *
     * @return an IntStream of char values from this sequence
2667
     * @since 9
2668 2669 2670 2671
     */
    @Override
    public IntStream chars() {
        return StreamSupport.intStream(
T
thartmann 已提交
2672 2673 2674
            isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
                       : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE),
            false);
2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686
    }


    /**
     * Returns a stream of code point values from this sequence.  Any surrogate
     * pairs encountered in the sequence are combined as if by {@linkplain
     * Character#toCodePoint Character.toCodePoint} and the result is passed
     * to the stream. Any other code units, including ordinary BMP characters,
     * unpaired surrogates, and undefined code units, are zero-extended to
     * {@code int} values which are then passed to the stream.
     *
     * @return an IntStream of Unicode code points from this sequence
2687
     * @since 9
2688 2689 2690 2691
     */
    @Override
    public IntStream codePoints() {
        return StreamSupport.intStream(
T
thartmann 已提交
2692 2693 2694
            isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE)
                       : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE),
            false);
2695 2696
    }

D
duke 已提交
2697 2698 2699 2700 2701 2702 2703 2704
    /**
     * Converts this string to a new character array.
     *
     * @return  a newly allocated character array whose length is the length
     *          of this string and whose contents are initialized to contain
     *          the character sequence represented by this string.
     */
    public char[] toCharArray() {
T
thartmann 已提交
2705 2706
        return isLatin1() ? StringLatin1.toChars(value)
                          : StringUTF16.toChars(value);
D
duke 已提交
2707 2708 2709 2710 2711 2712 2713
    }

    /**
     * Returns a formatted string using the specified format string and
     * arguments.
     *
     * <p> The locale always used is the one returned by {@link
2714 2715 2716
     * java.util.Locale#getDefault(java.util.Locale.Category)
     * Locale.getDefault(Locale.Category)} with
     * {@link java.util.Locale.Category#FORMAT FORMAT} category specified.
D
duke 已提交
2717 2718 2719 2720 2721 2722 2723 2724 2725 2726
     *
     * @param  format
     *         A <a href="../util/Formatter.html#syntax">format string</a>
     *
     * @param  args
     *         Arguments referenced by the format specifiers in the format
     *         string.  If there are more arguments than format specifiers, the
     *         extra arguments are ignored.  The number of arguments is
     *         variable and may be zero.  The maximum number of arguments is
     *         limited by the maximum dimension of a Java array as defined by
2727 2728
     *         <cite>The Java&trade; Virtual Machine Specification</cite>.
     *         The behaviour on a
J
jgish 已提交
2729
     *         {@code null} argument depends on the <a
D
duke 已提交
2730 2731
     *         href="../util/Formatter.html#syntax">conversion</a>.
     *
A
alanb 已提交
2732
     * @throws  java.util.IllegalFormatException
D
duke 已提交
2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745
     *          If a format string contains an illegal syntax, a format
     *          specifier that is incompatible with the given arguments,
     *          insufficient arguments given the format string, or other
     *          illegal conditions.  For specification of all possible
     *          formatting errors, see the <a
     *          href="../util/Formatter.html#detail">Details</a> section of the
     *          formatter class specification.
     *
     * @return  A formatted string
     *
     * @see  java.util.Formatter
     * @since  1.5
     */
2746
    public static String format(String format, Object... args) {
D
duke 已提交
2747 2748 2749 2750 2751 2752 2753 2754 2755
        return new Formatter().format(format, args).toString();
    }

    /**
     * Returns a formatted string using the specified locale, format string,
     * and arguments.
     *
     * @param  l
     *         The {@linkplain java.util.Locale locale} to apply during
J
jgish 已提交
2756
     *         formatting.  If {@code l} is {@code null} then no localization
D
duke 已提交
2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767
     *         is applied.
     *
     * @param  format
     *         A <a href="../util/Formatter.html#syntax">format string</a>
     *
     * @param  args
     *         Arguments referenced by the format specifiers in the format
     *         string.  If there are more arguments than format specifiers, the
     *         extra arguments are ignored.  The number of arguments is
     *         variable and may be zero.  The maximum number of arguments is
     *         limited by the maximum dimension of a Java array as defined by
2768 2769
     *         <cite>The Java&trade; Virtual Machine Specification</cite>.
     *         The behaviour on a
2770 2771
     *         {@code null} argument depends on the
     *         <a href="../util/Formatter.html#syntax">conversion</a>.
D
duke 已提交
2772
     *
A
alanb 已提交
2773
     * @throws  java.util.IllegalFormatException
D
duke 已提交
2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786
     *          If a format string contains an illegal syntax, a format
     *          specifier that is incompatible with the given arguments,
     *          insufficient arguments given the format string, or other
     *          illegal conditions.  For specification of all possible
     *          formatting errors, see the <a
     *          href="../util/Formatter.html#detail">Details</a> section of the
     *          formatter class specification
     *
     * @return  A formatted string
     *
     * @see  java.util.Formatter
     * @since  1.5
     */
2787
    public static String format(Locale l, String format, Object... args) {
D
duke 已提交
2788 2789 2790 2791
        return new Formatter(l).format(format, args).toString();
    }

    /**
2792
     * Returns the string representation of the {@code Object} argument.
D
duke 已提交
2793
     *
2794 2795 2796 2797
     * @param   obj   an {@code Object}.
     * @return  if the argument is {@code null}, then a string equal to
     *          {@code "null"}; otherwise, the value of
     *          {@code obj.toString()} is returned.
D
duke 已提交
2798 2799 2800 2801 2802 2803 2804
     * @see     java.lang.Object#toString()
     */
    public static String valueOf(Object obj) {
        return (obj == null) ? "null" : obj.toString();
    }

    /**
2805
     * Returns the string representation of the {@code char} array
D
duke 已提交
2806
     * argument. The contents of the character array are copied; subsequent
2807 2808
     * modification of the character array does not affect the returned
     * string.
D
duke 已提交
2809
     *
2810 2811 2812
     * @param   data     the character array.
     * @return  a {@code String} that contains the characters of the
     *          character array.
D
duke 已提交
2813 2814 2815 2816 2817 2818 2819
     */
    public static String valueOf(char data[]) {
        return new String(data);
    }

    /**
     * Returns the string representation of a specific subarray of the
2820
     * {@code char} array argument.
D
duke 已提交
2821
     * <p>
2822 2823
     * The {@code offset} argument is the index of the first
     * character of the subarray. The {@code count} argument
D
duke 已提交
2824 2825
     * specifies the length of the subarray. The contents of the subarray
     * are copied; subsequent modification of the character array does not
2826
     * affect the returned string.
D
duke 已提交
2827 2828
     *
     * @param   data     the character array.
2829 2830 2831 2832
     * @param   offset   initial offset of the subarray.
     * @param   count    length of the subarray.
     * @return  a {@code String} that contains the characters of the
     *          specified subarray of the character array.
2833 2834 2835 2836
     * @exception IndexOutOfBoundsException if {@code offset} is
     *          negative, or {@code count} is negative, or
     *          {@code offset+count} is larger than
     *          {@code data.length}.
D
duke 已提交
2837 2838 2839 2840 2841 2842
     */
    public static String valueOf(char data[], int offset, int count) {
        return new String(data, offset, count);
    }

    /**
2843
     * Equivalent to {@link #valueOf(char[], int, int)}.
D
duke 已提交
2844 2845 2846 2847
     *
     * @param   data     the character array.
     * @param   offset   initial offset of the subarray.
     * @param   count    length of the subarray.
2848
     * @return  a {@code String} that contains the characters of the
D
duke 已提交
2849
     *          specified subarray of the character array.
2850 2851 2852 2853
     * @exception IndexOutOfBoundsException if {@code offset} is
     *          negative, or {@code count} is negative, or
     *          {@code offset+count} is larger than
     *          {@code data.length}.
D
duke 已提交
2854 2855 2856 2857 2858 2859
     */
    public static String copyValueOf(char data[], int offset, int count) {
        return new String(data, offset, count);
    }

    /**
2860
     * Equivalent to {@link #valueOf(char[])}.
D
duke 已提交
2861 2862
     *
     * @param   data   the character array.
2863
     * @return  a {@code String} that contains the characters of the
D
duke 已提交
2864 2865 2866
     *          character array.
     */
    public static String copyValueOf(char data[]) {
2867
        return new String(data);
D
duke 已提交
2868 2869 2870
    }

    /**
2871
     * Returns the string representation of the {@code boolean} argument.
D
duke 已提交
2872
     *
2873 2874 2875 2876
     * @param   b   a {@code boolean}.
     * @return  if the argument is {@code true}, a string equal to
     *          {@code "true"} is returned; otherwise, a string equal to
     *          {@code "false"} is returned.
D
duke 已提交
2877 2878 2879 2880 2881 2882
     */
    public static String valueOf(boolean b) {
        return b ? "true" : "false";
    }

    /**
2883
     * Returns the string representation of the {@code char}
D
duke 已提交
2884 2885
     * argument.
     *
2886 2887 2888
     * @param   c   a {@code char}.
     * @return  a string of length {@code 1} containing
     *          as its single character the argument {@code c}.
D
duke 已提交
2889 2890
     */
    public static String valueOf(char c) {
T
thartmann 已提交
2891 2892 2893 2894
        if (COMPACT_STRINGS && StringLatin1.canEncode(c)) {
            return new String(StringLatin1.toBytes(c), LATIN1);
        }
        return new String(StringUTF16.toBytes(c), UTF16);
D
duke 已提交
2895 2896 2897
    }

    /**
2898
     * Returns the string representation of the {@code int} argument.
D
duke 已提交
2899 2900
     * <p>
     * The representation is exactly the one returned by the
2901
     * {@code Integer.toString} method of one argument.
D
duke 已提交
2902
     *
2903 2904
     * @param   i   an {@code int}.
     * @return  a string representation of the {@code int} argument.
D
duke 已提交
2905 2906 2907
     * @see     java.lang.Integer#toString(int, int)
     */
    public static String valueOf(int i) {
2908
        return Integer.toString(i);
D
duke 已提交
2909 2910 2911
    }

    /**
2912
     * Returns the string representation of the {@code long} argument.
D
duke 已提交
2913 2914
     * <p>
     * The representation is exactly the one returned by the
2915
     * {@code Long.toString} method of one argument.
D
duke 已提交
2916
     *
2917 2918
     * @param   l   a {@code long}.
     * @return  a string representation of the {@code long} argument.
D
duke 已提交
2919 2920 2921
     * @see     java.lang.Long#toString(long)
     */
    public static String valueOf(long l) {
2922
        return Long.toString(l);
D
duke 已提交
2923 2924 2925
    }

    /**
2926
     * Returns the string representation of the {@code float} argument.
D
duke 已提交
2927 2928
     * <p>
     * The representation is exactly the one returned by the
2929
     * {@code Float.toString} method of one argument.
D
duke 已提交
2930
     *
2931 2932
     * @param   f   a {@code float}.
     * @return  a string representation of the {@code float} argument.
D
duke 已提交
2933 2934 2935 2936 2937 2938 2939
     * @see     java.lang.Float#toString(float)
     */
    public static String valueOf(float f) {
        return Float.toString(f);
    }

    /**
2940
     * Returns the string representation of the {@code double} argument.
D
duke 已提交
2941 2942
     * <p>
     * The representation is exactly the one returned by the
2943
     * {@code Double.toString} method of one argument.
D
duke 已提交
2944
     *
2945 2946
     * @param   d   a {@code double}.
     * @return  a  string representation of the {@code double} argument.
D
duke 已提交
2947 2948 2949 2950 2951 2952 2953 2954 2955 2956
     * @see     java.lang.Double#toString(double)
     */
    public static String valueOf(double d) {
        return Double.toString(d);
    }

    /**
     * Returns a canonical representation for the string object.
     * <p>
     * A pool of strings, initially empty, is maintained privately by the
2957
     * class {@code String}.
D
duke 已提交
2958 2959
     * <p>
     * When the intern method is invoked, if the pool already contains a
2960
     * string equal to this {@code String} object as determined by
D
duke 已提交
2961
     * the {@link #equals(Object)} method, then the string from the pool is
2962 2963
     * returned. Otherwise, this {@code String} object is added to the
     * pool and a reference to this {@code String} object is returned.
D
duke 已提交
2964
     * <p>
2965 2966 2967
     * It follows that for any two strings {@code s} and {@code t},
     * {@code s.intern() == t.intern()} is {@code true}
     * if and only if {@code s.equals(t)} is {@code true}.
D
duke 已提交
2968 2969
     * <p>
     * All literal strings and string-valued constant expressions are
2970 2971
     * interned. String literals are defined in section 3.10.5 of the
     * <cite>The Java&trade; Language Specification</cite>.
D
duke 已提交
2972 2973 2974
     *
     * @return  a string that has the same contents as this string, but is
     *          guaranteed to be from a pool of unique strings.
2975
     * @jls 3.10.5 String Literals
D
duke 已提交
2976 2977
     */
    public native String intern();
T
thartmann 已提交
2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065

    ////////////////////////////////////////////////////////////////

    /**
     * Copy character bytes from this string into dst starting at dstBegin.
     * This method doesn't perform any range checking.
     *
     * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two
     * coders are different, and dst is big enough (range check)
     *
     * @param dstBegin  the char index, not offset of byte[]
     * @param coder     the coder of dst[]
     */
    void getBytes(byte dst[], int dstBegin, byte coder) {
        if (coder() == coder) {
            System.arraycopy(value, 0, dst, dstBegin << coder, value.length);
        } else {    // this.coder == LATIN && coder == UTF16
            StringLatin1.inflate(value, 0, dst, dstBegin, value.length);
        }
    }

    /*
     * Package private constructor. Trailing Void argument is there for
     * disambiguating it against other (public) constructors.
     *
     * Stores the char[] value into a byte[] that each byte represents
     * the8 low-order bits of the corresponding character, if the char[]
     * contains only latin1 character. Or a byte[] that stores all
     * characters in their byte sequences defined by the {@code StringUTF16}.
     */
    String(char[] value, int off, int len, Void sig) {
        if (len == 0) {
            this.value = "".value;
            this.coder = "".coder;
            return;
        }
        if (COMPACT_STRINGS) {
            byte[] val = StringUTF16.compress(value, off, len);
            if (val != null) {
                this.value = val;
                this.coder = LATIN1;
                return;
            }
        }
        this.coder = UTF16;
        this.value = StringUTF16.toBytes(value, off, len);
    }

    /*
     * Package private constructor. Trailing Void argument is there for
     * disambiguating it against other (public) constructors.
     */
    String(AbstractStringBuilder asb, Void sig) {
        byte[] val = asb.getValue();
        int length = asb.length();
        if (asb.isLatin1()) {
            this.coder = LATIN1;
            this.value = Arrays.copyOfRange(val, 0, length);
        } else {
            if (COMPACT_STRINGS) {
                byte[] buf = StringUTF16.compress(val, 0, length);
                if (buf != null) {
                    this.coder = LATIN1;
                    this.value = buf;
                    return;
                }
            }
            this.coder = UTF16;
            this.value = Arrays.copyOfRange(val, 0, length << 1);
        }
    }

   /*
    * Package private constructor which shares value array for speed.
    */
    String(byte[] value, byte coder) {
        this.value = value;
        this.coder = coder;
    }

    byte coder() {
        return COMPACT_STRINGS ? coder : UTF16;
    }

    private boolean isLatin1() {
        return COMPACT_STRINGS && coder == LATIN1;
    }

3066 3067
    @Native static final byte LATIN1 = 0;
    @Native static final byte UTF16  = 1;
T
thartmann 已提交
3068 3069 3070 3071 3072 3073 3074

    /*
     * StringIndexOutOfBoundsException  if {@code index} is
     * negative or greater than or equal to {@code length}.
     */
    static void checkIndex(int index, int length) {
        if (index < 0 || index >= length) {
3075 3076
            throw new StringIndexOutOfBoundsException("index " + index +
                                                      ",length " + length);
T
thartmann 已提交
3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098
        }
    }

    /*
     * StringIndexOutOfBoundsException  if {@code offset}
     * is negative or greater than {@code length}.
     */
    static void checkOffset(int offset, int length) {
        if (offset < 0 || offset > length) {
            throw new StringIndexOutOfBoundsException("offset " + offset +
                                                      ",length " + length);
        }
    }

    /*
     * Check {@code offset}, {@code count} against {@code 0} and {@code length}
     * bounds.
     *
     * @throws  StringIndexOutOfBoundsException
     *          If {@code offset} is negative, {@code count} is negative,
     *          or {@code offset} is greater than {@code length - count}
     */
3099
    static void checkBoundsOffCount(int offset, int count, int length) {
T
thartmann 已提交
3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113
        if (offset < 0 || count < 0 || offset > length - count) {
            throw new StringIndexOutOfBoundsException(
                "offset " + offset + ", count " + count + ", length " + length);
        }
    }

    /*
     * Check {@code begin}, {@code end} against {@code 0} and {@code length}
     * bounds.
     *
     * @throws  StringIndexOutOfBoundsException
     *          If {@code begin} is negative, {@code begin} is greater than
     *          {@code end}, or {@code end} is greater than {@code length}.
     */
3114
    static void checkBoundsBeginEnd(int begin, int end, int length) {
T
thartmann 已提交
3115 3116 3117 3118 3119
        if (begin < 0 || begin > end || end > length) {
            throw new StringIndexOutOfBoundsException(
                "begin " + begin + ", end " + end + ", length " + length);
        }
    }
D
duke 已提交
3120
}