package java.lang;
import java.io.ObjectStreamField;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Formatter;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
- The <code>String</code> class represents character strings.
- JAVA程序中的所有的string字符乙帮,比如"abc",是作為String類(lèi)的實(shí)例實(shí)現(xiàn)的极景。
- Strings 是常量;其值在創(chuàng)建后不能在有改變察净。String buffers支持不可變字符串。
- 因?yàn)镾tring 對(duì)象是不可變的盼樟,因此他們可以被共享氢卡。比如:
String str = "abc";
- 相當(dāng)于
char data[] = {'a', 'b', 'c'};
String str = new String(data);
- 下面是更多的使用用例:
System.out.println("abc");
String cde = "cde";
System.out.println("abc" + cde);
String c = "abc".substring(2,3);
String d = cde.substring(1, 2);
String類(lèi)包括檢驗(yàn)單個(gè)字符的序列的方法,用于字符串比較晨缴、字符串搜索译秦、提取子字符串,以及將字符串轉(zhuǎn)化為大寫(xiě)或小寫(xiě)的字符串副本击碗。
Java語(yǔ)言為字符串連接操作符(+)提供了特殊支持筑悴,以及支持將其他對(duì)象轉(zhuǎn)化為字符串。字符串連接是由StringBuffer類(lèi)和它的append方法實(shí)現(xiàn)的稍途。
toString()方法:由Object定義阁吝,被Java中的所有類(lèi)繼承。
Java語(yǔ)言規(guī)范
除非傳入構(gòu)造器的參數(shù)為null晰房,或者該類(lèi)中的方法會(huì)導(dǎo)致NullPointerException的拋出求摇。
索引值表示代碼單元射沟,補(bǔ)充字符在字符串中占用了兩個(gè)位置殊者。String類(lèi)除了處理Unicode代碼單元的方法外,也為處理Unicode守則要點(diǎn)提供了方法
public final class String
implements java.io.Serializable, Comparable<String>, CharSequence {
private final char value[]; /** 用于存儲(chǔ)char字符 */
private int hash; /** 為字符串緩存哈希值 */
// 默認(rèn)為值0
private static final long serialVersionUID = -6849794470754667710L; /**使用JDK 1.0.2中的UID*/
/**
* String類(lèi)是序列化流協(xié)議的特殊包中的验夯。
* 一個(gè)字符串實(shí)例最初被寫(xiě)成ObjectOutputStream格式的猖吴,具體如下:
*/
private static final ObjectStreamField[] serialPersistentFields =
new ObjectStreamField[0];
* 初始化一個(gè)新建的對(duì)象,該對(duì)象表示一個(gè)空子序列挥转。由于String是不可變的海蔽,因此其構(gòu)造器不是必須的。
public String() {
this.value = new char[0];
}
* 初始化一個(gè)新建的對(duì)象來(lái)表示相同的字符序列作為參數(shù)绑谣,換句話說(shuō)党窜,新建的字符串是參數(shù)字符串的拷貝。除非顯式拷貝是必要的借宵,該構(gòu)造器的使用是無(wú)用的幌衣。
public String(String original) {
this.value = original.value;
this.hash = original.hash;
}
-
分配一個(gè)新的字符串用于表示字符數(shù)組參數(shù)包含的字符序列。拷貝字符數(shù)組的內(nèi)容豁护;字符數(shù)組的后續(xù)修改不會(huì)影響新建的字符串哼凯。
public String(char value[]) { this.value = Arrays.copyOf(value, value.length); }
分配一個(gè)新數(shù)組用于保存字符數(shù)組參數(shù)的子數(shù)組的內(nèi)容。參數(shù)為 子數(shù)組的第一個(gè)字符的索引楚里,并且參數(shù)count描述了子數(shù)組的長(zhǎng)度断部。拷貝子數(shù)組的內(nèi)容班缎;字符數(shù)組的后續(xù)修改不影響新建的字符串蝴光。
@param value :源字符數(shù)組
@param offset :初始位移
@param count :字符長(zhǎng)度
@throws IndexOutOfBoundsException : 如果參數(shù)offset和count的索引字符超出了數(shù)組界限,即拋出異常
public String(char value[], int offset, int count) { if (offset < 0) { throw new StringIndexOutOfBoundsException(offset); } if (count < 0) { throw new StringIndexOutOfBoundsException(count); } // Note: offset or count might be near -1>>>1. if (offset > value.length - count) { throw new StringIndexOutOfBoundsException(offset + count); } this.value = Arrays.copyOfRange(value, offset, offset+count); }
* 分配一個(gè)新的字符串用于保存參數(shù)(Unicode code point)的子數(shù)組中的字符达址。
參數(shù)offset是子數(shù)組的第一個(gè)code point的索引值虱疏,參數(shù)count描述了子數(shù)組的長(zhǎng)度。子數(shù)組的內(nèi)容被轉(zhuǎn)換為char數(shù)組苏携,字符數(shù)組的后續(xù)修改不影響新建的字符串做瞪。
* @param codePoints :Unicode code points 的源數(shù)組
* @param offset :初始位移
* @param count :源數(shù)組的長(zhǎng)度
* @throws IllegalArgumentException :
如果 codePoints中存在任何無(wú)用的Unicode code poin,觸發(fā)該異常}
* @throws IndexOutOfBoundsException :
如果參數(shù)offset和count的索引字符超出了數(shù)組界限右冻,即拋出異常
public String(int[] codePoints, int offset, int count) {
if (offset < 0) {
throw new StringIndexOutOfBoundsException(offset);
}
if (count < 0) {
throw new StringIndexOutOfBoundsException(count);
}
// Note: offset or count might be near -1>>>1.
if (offset > codePoints.length - count) {
throw new StringIndexOutOfBoundsException(offset + count);
}
final int end = offset + count;
// Pass 1: 計(jì)算 char[]的精確長(zhǎng)度
int n = count;
for (int i = offset; i < end; i++) {
int c = codePoints[i];
if (Character.isBmpCodePoint(c))
continue;
else if (Character.isValidCodePoint(c))
n++;
else throw new IllegalArgumentException(Integer.toString(c));
}
// Pass 2: 分配以及填充char[]
final char[] v = new char[n];
for (int i = offset, j = 0; i < end; i++, j++) {
int c = codePoints[i];
if (Character.isBmpCodePoint(c))
v[j] = (char)c;
else
Character.toSurrogates(c, v, j++);
}
this.value = v;
}
* 分配一個(gè)新的字符串装蓬,由8位整型值的數(shù)組的子數(shù)組構(gòu)造
* 參數(shù)offset是子數(shù)組第一個(gè)字節(jié)的索引,參數(shù)count描述了子數(shù)組的長(zhǎng)度纱扭。
* 子數(shù)組的每個(gè)字節(jié)被轉(zhuǎn)化為char牍帚,如上面描述的方法
* @deprecated 方法并不能將字節(jié)轉(zhuǎn)化為字符。
* 在 JDK 1.1中, 更側(cè)重于使用String構(gòu)造器來(lái)轉(zhuǎn)化乳蛾,字符集名稱(chēng)或平臺(tái)默認(rèn)的字符集
* @param ascii :要被轉(zhuǎn)化為字符的字節(jié)
* @param hibyte :每個(gè)16比特的Unicode code unit 的前8比特
* @param offset :初始位移
* @param count :字節(jié)長(zhǎng)度
* @throws IndexOutOfBoundsException :參數(shù)offset和count的值是無(wú)用的
* @see #String(byte[], int)
* @see #String(byte[], int, int, java.lang.String)
* @see #String(byte[], int, int, java.nio.charset.Charset)
* @see #String(byte[], int, int)
* @see #String(byte[], java.lang.String)
* @see #String(byte[], java.nio.charset.Charset)
* @see #String(byte[])
*/
@Deprecated
public String(byte ascii[], int hibyte, int offset, int count) {
checkBounds(ascii, offset, count);
char value[] = new char[count];
if (hibyte == 0) {
for (int i = count; i-- > 0;) {
value[i] = (char)(ascii[i + offset] & 0xff);
}
} else {
hibyte <<= 8;
for (int i = count; i-- > 0;) {
value[i] = (char)(hibyte | (ascii[i + offset] & 0xff));
}
}
this.value = value;
}
/**
* Allocates a new {@code String} containing characters constructed from
* an array of 8-bit integer values. Each character <i>c</i>in the
* resulting string is constructed from the corresponding component
* <i>b</i> in the byte array such that:
*
* <blockquote><pre>
* <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8)
* | (<b><i>b</i></b> & 0xff))
* </pre></blockquote>
*
* @deprecated This method does not properly convert bytes into
* characters. As of JDK 1.1, the preferred way to do this is via the
* {@code String} constructors that take a {@link
* java.nio.charset.Charset}, charset name, or that use the platform's
* default charset.
*
* @param ascii
* The bytes to be converted to characters
*
* @param hibyte
* The top 8 bits of each 16-bit Unicode code unit
*
* @see #String(byte[], int, int, java.lang.String)
* @see #String(byte[], int, int, java.nio.charset.Charset)
* @see #String(byte[], int, int)
* @see #String(byte[], java.lang.String)
* @see #String(byte[], java.nio.charset.Charset)
* @see #String(byte[])
*/
@Deprecated
public String(byte ascii[], int hibyte) {
this(ascii, hibyte, 0, ascii.length);
}
/* Common private utility method used to bounds check the byte array
* and requested offset & length values used by the String(byte[],..)
* constructors.
*/
private static void checkBounds(byte[] bytes, int offset, int length) {
if (length < 0)
throw new StringIndexOutOfBoundsException(length);
if (offset < 0)
throw new StringIndexOutOfBoundsException(offset);
if (offset > bytes.length - length)
throw new StringIndexOutOfBoundsException(offset + length);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the specified charset. The length of the new {@code String}
* is a function of the charset, and hence may not be equal to the length
* of the subarray.
*
* <p> The behavior of this constructor when the given bytes are not valid
* in the given charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since JDK1.1
*/
public String(byte bytes[], int offset, int length, String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null)
throw new NullPointerException("charsetName");
checkBounds(bytes, offset, length);
this.value = StringCoding.decode(charsetName, bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
* The length of the new {@code String} is a function of the charset, and
* hence may not be equal to the length of the subarray.
*
* <p> This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement string. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
*
* @param charset
* The {@linkplain java.nio.charset.Charset charset} to be used to
* decode the {@code bytes}
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since 1.6
*/
public String(byte bytes[], int offset, int length, Charset charset) {
if (charset == null)
throw new NullPointerException("charset");
checkBounds(bytes, offset, length);
this.value = StringCoding.decode(charset, bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the specified {@linkplain java.nio.charset.Charset charset}. The
* length of the new {@code String} is a function of the charset, and hence
* may not be equal to the length of the byte array.
*
* <p> The behavior of this constructor when the given bytes are not valid
* in the given charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @since JDK1.1
*/
public String(byte bytes[], String charsetName)
throws UnsupportedEncodingException {
this(bytes, 0, bytes.length, charsetName);
}
/**
* Constructs a new {@code String} by decoding the specified array of
* bytes using the specified {@linkplain java.nio.charset.Charset charset}.
* The length of the new {@code String} is a function of the charset, and
* hence may not be equal to the length of the byte array.
*
* <p> This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement string. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param charset
* The {@linkplain java.nio.charset.Charset charset} to be used to
* decode the {@code bytes}
*
* @since 1.6
*/
public String(byte bytes[], Charset charset) {
this(bytes, 0, bytes.length, charset);
}
/**
* Constructs a new {@code String} by decoding the specified subarray of
* bytes using the platform's default charset. The length of the new
* {@code String} is a function of the charset, and hence may not be equal
* to the length of the subarray.
*
* <p> The behavior of this constructor when the given bytes are not valid
* in the default charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @param offset
* The index of the first byte to decode
*
* @param length
* The number of bytes to decode
*
* @throws IndexOutOfBoundsException
* If the {@code offset} and the {@code length} arguments index
* characters outside the bounds of the {@code bytes} array
*
* @since JDK1.1
*/
public String(byte bytes[], int offset, int length) {
checkBounds(bytes, offset, length);
this.value = StringCoding.decode(bytes, offset, length);
}
/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the platform's default charset. The length of the new {@code
* String} is a function of the charset, and hence may not be equal to the
* length of the byte array.
*
* <p> The behavior of this constructor when the given bytes are not valid
* in the default charset is unspecified. The {@link
* java.nio.charset.CharsetDecoder} class should be used when more control
* over the decoding process is required.
*
* @param bytes
* The bytes to be decoded into characters
*
* @since JDK1.1
*/
public String(byte bytes[]) {
this(bytes, 0, bytes.length);
}
/**
* Allocates a new string that contains the sequence of characters
* currently contained in the string buffer argument. The contents of the
* string buffer are copied; subsequent modification of the string buffer
* does not affect the newly created string.
*
* @param buffer
* A {@code StringBuffer}
*/
public String(StringBuffer buffer) {
synchronized(buffer) {
this.value = Arrays.copyOf(buffer.getValue(), buffer.length());
}
}
/**
* Allocates a new string that contains the sequence of characters
* currently contained in the string builder argument. The contents of the
* string builder are copied; subsequent modification of the string builder
* does not affect the newly created string.
*
* <p> This constructor is provided to ease migration to {@code
* StringBuilder}. Obtaining a string from a string builder via the {@code
* toString} method is likely to run faster and is generally preferred.
*
* @param builder
* A {@code StringBuilder}
*
* @since 1.5
*/
public String(StringBuilder builder) {
this.value = Arrays.copyOf(builder.getValue(), builder.length());
}
/*
* Package private constructor which shares value array for speed.
* this constructor is always expected to be called with share==true.
* a separate constructor is needed because we already have a public
* String(char[]) constructor that makes a copy of the given char[].
*/
String(char[] value, boolean share) {
// assert share : "unshared not supported";
this.value = value;
}
/**
* Package private constructor
*
* @deprecated Use {@link #String(char[],int,int)} instead.
*/
@Deprecated
String(int offset, int count, char[] value) {
this(value, offset, count);
}
/**
* Returns the length of this string.
* The length is equal to the number of <a href="Character.html#unicode">Unicode
* code units</a> in the string.
*
* @return the length of the sequence of characters represented by this
* object.
*/
public int length() {
return value.length;
}
/**
* Returns <tt>true</tt> if, and only if, {@link #length()} is <tt>0</tt>.
*
* @return <tt>true</tt> if {@link #length()} is <tt>0</tt>, otherwise
* <tt>false</tt>
*
* @since 1.6
*/
public boolean isEmpty() {
return value.length == 0;
}
/**
* Returns the <code>char</code> value at the
* specified index. An index ranges from <code>0</code> to
* <code>length() - 1</code>. The first <code>char</code> value of the sequence
* is at index <code>0</code>, the next at index <code>1</code>,
* and so on, as for array indexing.
*
* <p>If the <code>char</code> value specified by the index is a
* <a href="Character.html#unicode">surrogate</a>, the surrogate
* value is returned.
*
* @param index the index of the <code>char</code> value.
* @return the <code>char</code> value at the specified index of this string.
* The first <code>char</code> value is at index <code>0</code>.
* @exception IndexOutOfBoundsException if the <code>index</code>
* argument is negative or not less than the length of this
* string.
*/
public char charAt(int index) {
if ((index < 0) || (index >= value.length)) {
throw new StringIndexOutOfBoundsException(index);
}
return value[index];
}
/**
* Returns the character (Unicode code point) at the specified
* index. The index refers to <code>char</code> values
* (Unicode code units) and ranges from <code>0</code> to
* {@link #length()}<code> - 1</code>.
*
* <p> If the <code>char</code> value specified at the given index
* is in the high-surrogate range, the following index is less
* than the length of this <code>String</code>, and the
* <code>char</code> value at the following index is in the
* low-surrogate range, then the supplementary code point
* corresponding to this surrogate pair is returned. Otherwise,
* the <code>char</code> value at the given index is returned.
*
* @param index the index to the <code>char</code> values
* @return the code point value of the character at the
* <code>index</code>
* @exception IndexOutOfBoundsException if the <code>index</code>
* argument is negative or not less than the length of this
* string.
* @since 1.5
*/
public int codePointAt(int index) {
if ((index < 0) || (index >= value.length)) {
throw new StringIndexOutOfBoundsException(index);
}
return Character.codePointAtImpl(value, index, value.length);
}
/**
* Returns the character (Unicode code point) before the specified
* index. The index refers to <code>char</code> values
* (Unicode code units) and ranges from <code>1</code> to {@link
* CharSequence#length() length}.
*
* <p> If the <code>char</code> value at <code>(index - 1)</code>
* is in the low-surrogate range, <code>(index - 2)</code> is not
* negative, and the <code>char</code> value at <code>(index -
* 2)</code> is in the high-surrogate range, then the
* supplementary code point value of the surrogate pair is
* returned. If the <code>char</code> value at <code>index -
* 1</code> is an unpaired low-surrogate or a high-surrogate, the
* surrogate value is returned.
*
* @param index the index following the code point that should be returned
* @return the Unicode code point value before the given index.
* @exception IndexOutOfBoundsException if the <code>index</code>
* argument is less than 1 or greater than the length
* of this string.
* @since 1.5
*/
public int codePointBefore(int index) {
int i = index - 1;
if ((i < 0) || (i >= value.length)) {
throw new StringIndexOutOfBoundsException(index);
}
return Character.codePointBeforeImpl(value, index, 0);
}
/**
* Returns the number of Unicode code points in the specified text
* range of this <code>String</code>. The text range begins at the
* specified <code>beginIndex</code> and extends to the
* <code>char</code> at index <code>endIndex - 1</code>. Thus the
* length (in <code>char</code>s) of the text range is
* <code>endIndex-beginIndex</code>. Unpaired surrogates within
* the text range count as one code point each.
*
* @param beginIndex the index to the first <code>char</code> of
* the text range.
* @param endIndex the index after the last <code>char</code> of
* the text range.
* @return the number of Unicode code points in the specified text
* range
* @exception IndexOutOfBoundsException if the
* <code>beginIndex</code> is negative, or <code>endIndex</code>
* is larger than the length of this <code>String</code>, or
* <code>beginIndex</code> is larger than <code>endIndex</code>.
* @since 1.5
*/
public int codePointCount(int beginIndex, int endIndex) {
if (beginIndex < 0 || endIndex > value.length || beginIndex > endIndex) {
throw new IndexOutOfBoundsException();
}
return Character.codePointCountImpl(value, beginIndex, endIndex - beginIndex);
}
/**
* Returns the index within this <code>String</code> that is
* offset from the given <code>index</code> by
* <code>codePointOffset</code> code points. Unpaired surrogates
* within the text range given by <code>index</code> and
* <code>codePointOffset</code> count as one code point each.
*
* @param index the index to be offset
* @param codePointOffset the offset in code points
* @return the index within this <code>String</code>
* @exception IndexOutOfBoundsException if <code>index</code>
* is negative or larger then the length of this
* <code>String</code>, or if <code>codePointOffset</code> is positive
* and the substring starting with <code>index</code> has fewer
* than <code>codePointOffset</code> code points,
* or if <code>codePointOffset</code> is negative and the substring
* before <code>index</code> has fewer than the absolute value
* of <code>codePointOffset</code> code points.
* @since 1.5
*/
public int offsetByCodePoints(int index, int codePointOffset) {
if (index < 0 || index > value.length) {
throw new IndexOutOfBoundsException();
}
return Character.offsetByCodePointsImpl(value, 0, value.length,
index, codePointOffset);
}
/**
* Copy characters from this string into dst starting at dstBegin.
* This method doesn't perform any range checking.
*/
void getChars(char dst[], int dstBegin) {
System.arraycopy(value, 0, dst, dstBegin, value.length);
}
/**
* Copies characters from this string into the destination character
* array.
* <p>
* The first character to be copied is at index <code>srcBegin</code>;
* the last character to be copied is at index <code>srcEnd-1</code>
* (thus the total number of characters to be copied is
* <code>srcEnd-srcBegin</code>). The characters are copied into the
* subarray of <code>dst</code> starting at index <code>dstBegin</code>
* and ending at index:
* <p><blockquote><pre>
* dstbegin + (srcEnd-srcBegin) - 1
* </pre></blockquote>
*
* @param srcBegin index of the first character in the string
* to copy.
* @param srcEnd index after the last character in the string
* to copy.
* @param dst the destination array.
* @param dstBegin the start offset in the destination array.
* @exception IndexOutOfBoundsException If any of the following
* is true:
* <ul><li><code>srcBegin</code> is negative.
* <li><code>srcBegin</code> is greater than <code>srcEnd</code>
* <li><code>srcEnd</code> is greater than the length of this
* string
* <li><code>dstBegin</code> is negative
* <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
* <code>dst.length</code></ul>
*/
public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) {
if (srcBegin < 0) {
throw new StringIndexOutOfBoundsException(srcBegin);
}
if (srcEnd > value.length) {
throw new StringIndexOutOfBoundsException(srcEnd);
}
if (srcBegin > srcEnd) {
throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
}
System.arraycopy(value, srcBegin, dst, dstBegin, srcEnd - srcBegin);
}
/**
* Copies characters from this string into the destination byte array. Each
* byte receives the 8 low-order bits of the corresponding character. The
* eight high-order bits of each character are not copied and do not
* participate in the transfer in any way.
*
* <p> The first character to be copied is at index {@code srcBegin}; the
* last character to be copied is at index {@code srcEnd-1}. The total
* number of characters to be copied is {@code srcEnd-srcBegin}. The
* characters, converted to bytes, are copied into the subarray of {@code
* dst} starting at index {@code dstBegin} and ending at index:
*
* <blockquote><pre>
* dstbegin + (srcEnd-srcBegin) - 1
* </pre></blockquote>
*
* @deprecated This method does not properly convert characters into
* bytes. As of JDK 1.1, the preferred way to do this is via the
* {@link #getBytes()} method, which uses the platform's default charset.
*
* @param srcBegin
* Index of the first character in the string to copy
*
* @param srcEnd
* Index after the last character in the string to copy
*
* @param dst
* The destination array
*
* @param dstBegin
* The start offset in the destination array
*
* @throws IndexOutOfBoundsException
* If any of the following is true:
* <ul>
* <li> {@code srcBegin} is negative
* <li> {@code srcBegin} is greater than {@code srcEnd}
* <li> {@code srcEnd} is greater than the length of this String
* <li> {@code dstBegin} is negative
* <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
* dst.length}
* </ul>
*/
@Deprecated
public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
if (srcBegin < 0) {
throw new StringIndexOutOfBoundsException(srcBegin);
}
if (srcEnd > value.length) {
throw new StringIndexOutOfBoundsException(srcEnd);
}
if (srcBegin > srcEnd) {
throw new StringIndexOutOfBoundsException(srcEnd - srcBegin);
}
int j = dstBegin;
int n = srcEnd;
int i = srcBegin;
char[] val = value; /* avoid getfield opcode */
while (i < n) {
dst[j++] = (byte)val[i++];
}
}
/**
* Encodes this {@code String} into a sequence of bytes using the named
* charset, storing the result into a new byte array.
*
* <p> The behavior of this method when this string cannot be encoded in
* the given charset is unspecified. The {@link
* java.nio.charset.CharsetEncoder} class should be used when more control
* over the encoding process is required.
*
* @param charsetName
* The name of a supported {@linkplain java.nio.charset.Charset
* charset}
*
* @return The resultant byte array
*
* @throws UnsupportedEncodingException
* If the named charset is not supported
*
* @since JDK1.1
*/
public byte[] getBytes(String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null) throw new NullPointerException();
return StringCoding.encode(charsetName, value, 0, value.length);
}
/**
* Encodes this {@code String} into a sequence of bytes using the given
* {@linkplain java.nio.charset.Charset charset}, storing the result into a
* new byte array.
*
* <p> This method always replaces malformed-input and unmappable-character
* sequences with this charset's default replacement byte array. The
* {@link java.nio.charset.CharsetEncoder} class should be used when more
* control over the encoding process is required.
*
* @param charset
* The {@linkplain java.nio.charset.Charset} to be used to encode
* the {@code String}
*
* @return The resultant byte array
*
* @since 1.6
*/
public byte[] getBytes(Charset charset) {
if (charset == null) throw new NullPointerException();
return StringCoding.encode(charset, value, 0, value.length);
}
/**
* Encodes this {@code String} into a sequence of bytes using the
* platform's default charset, storing the result into a new byte array.
*
* <p> The behavior of this method when this string cannot be encoded in
* the default charset is unspecified. The {@link
* java.nio.charset.CharsetEncoder} class should be used when more control
* over the encoding process is required.
*
* @return The resultant byte array
*
* @since JDK1.1
*/
public byte[] getBytes() {
return StringCoding.encode(value, 0, value.length);
}
/**
* Compares this string to the specified object. The result is {@code
* true} if and only if the argument is not {@code null} and is a {@code
* String} object that represents the same sequence of characters as this
* object.
*
* @param anObject
* The object to compare this {@code String} against
*
* @return {@code true} if the given object represents a {@code String}
* equivalent to this string, {@code false} otherwise
*
* @see #compareTo(String)
* @see #equalsIgnoreCase(String)
*/
public boolean equals(Object anObject) {
if (this == anObject) {
return true;
}
if (anObject instanceof String) {
String anotherString = (String) anObject;
int n = value.length;
if (n == anotherString.value.length) {
char v1[] = value;
char v2[] = anotherString.value;
int i = 0;
while (n-- != 0) {
if (v1[i] != v2[i])
return false;
i++;
}
return true;
}
}
return false;
}
/**
* Compares this string to the specified {@code StringBuffer}. The result
* is {@code true} if and only if this {@code String} represents the same
* sequence of characters as the specified {@code StringBuffer}.
*
* @param sb
* The {@code StringBuffer} to compare this {@code String} against
*
* @return {@code true} if this {@code String} represents the same
* sequence of characters as the specified {@code StringBuffer},
* {@code false} otherwise
*
* @since 1.4
*/
public boolean contentEquals(StringBuffer sb) {
synchronized (sb) {
return contentEquals((CharSequence) sb);
}
}
/**
* Compares this string to the specified {@code CharSequence}. The result
* is {@code true} if and only if this {@code String} represents the same
* sequence of char values as the specified sequence.
*
* @param cs
* The sequence to compare this {@code String} against
*
* @return {@code true} if this {@code String} represents the same
* sequence of char values as the specified sequence, {@code
* false} otherwise
*
* @since 1.5
*/
public boolean contentEquals(CharSequence cs) {
if (value.length != cs.length())
return false;
// Argument is a StringBuffer, StringBuilder
if (cs instanceof AbstractStringBuilder) {
char v1[] = value;
char v2[] = ((AbstractStringBuilder) cs).getValue();
int i = 0;
int n = value.length;
while (n-- != 0) {
if (v1[i] != v2[i])
return false;
i++;
}
return true;
}
// Argument is a String
if (cs.equals(this))
return true;
// Argument is a generic CharSequence
char v1[] = value;
int i = 0;
int n = value.length;
while (n-- != 0) {
if (v1[i] != cs.charAt(i))
return false;
i++;
}
return true;
}
/**
* Compares this {@code String} to another {@code String}, ignoring case
* considerations. Two strings are considered equal ignoring case if they
* are of the same length and corresponding characters in the two strings
* are equal ignoring case.
*
* <p> Two characters {@code c1} and {@code c2} are considered the same
* ignoring case if at least one of the following is true:
* <ul>
* <li> The two characters are the same (as compared by the
* {@code ==} operator)
* <li> Applying the method {@link
* java.lang.Character#toUpperCase(char)} to each character
* produces the same result
* <li> Applying the method {@link
* java.lang.Character#toLowerCase(char)} to each character
* produces the same result
* </ul>
*
* @param anotherString
* The {@code String} to compare this {@code String} against
*
* @return {@code true} if the argument is not {@code null} and it
* represents an equivalent {@code String} ignoring case; {@code
* false} otherwise
*
* @see #equals(Object)
*/
public boolean equalsIgnoreCase(String anotherString) {
return (this == anotherString) ? true
: (anotherString != null)
&& (anotherString.value.length == value.length)
&& regionMatches(true, 0, anotherString, 0, value.length);
}
/**
* Compares two strings lexicographically.
* The comparison is based on the Unicode value of each character in
* the strings. The character sequence represented by this
* <code>String</code> object is compared lexicographically to the
* character sequence represented by the argument string. The result is
* a negative integer if this <code>String</code> object
* lexicographically precedes the argument string. The result is a
* positive integer if this <code>String</code> object lexicographically
* follows the argument string. The result is zero if the strings
* are equal; <code>compareTo</code> returns <code>0</code> exactly when
* the {@link #equals(Object)} method would return <code>true</code>.
* <p>
* This is the definition of lexicographic ordering. If two strings are
* different, then either they have different characters at some index
* that is a valid index for both strings, or their lengths are different,
* or both. If they have different characters at one or more index
* positions, let <i>k</i> be the smallest such index; then the string
* whose character at position <i>k</i> has the smaller value, as
* determined by using the < operator, lexicographically precedes the
* other string. In this case, <code>compareTo</code> returns the
* difference of the two character values at position <code>k</code> in
* the two string -- that is, the value:
* <blockquote><pre>
* this.charAt(k)-anotherString.charAt(k)
* </pre></blockquote>
* If there is no index position at which they differ, then the shorter
* string lexicographically precedes the longer string. In this case,
* <code>compareTo</code> returns the difference of the lengths of the
* strings -- that is, the value:
* <blockquote><pre>
* this.length()-anotherString.length()
* </pre></blockquote>
*
* @param anotherString the <code>String</code> to be compared.
* @return the value <code>0</code> if the argument string is equal to
* this string; a value less than <code>0</code> if this string
* is lexicographically less than the string argument; and a
* value greater than <code>0</code> if this string is
* lexicographically greater than the string argument.
*/
public int compareTo(String anotherString) {
int len1 = value.length;
int len2 = anotherString.value.length;
int lim = Math.min(len1, len2);
char v1[] = value;
char v2[] = anotherString.value;
int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
k++;
}
return len1 - len2;
}
/**
* A Comparator that orders <code>String</code> objects as by
* <code>compareToIgnoreCase</code>. This comparator is serializable.
* <p>
* Note that this Comparator does <em>not</em> take locale into account,
* and will result in an unsatisfactory ordering for certain locales.
* The java.text package provides <em>Collators</em> to allow
* locale-sensitive ordering.
*
* @see java.text.Collator#compare(String, String)
* @since 1.2
*/
public static final Comparator<String> CASE_INSENSITIVE_ORDER
= new CaseInsensitiveComparator();
private static class CaseInsensitiveComparator
implements Comparator<String>, java.io.Serializable {
// use serialVersionUID from JDK 1.2.2 for interoperability
private static final long serialVersionUID = 8575799808933029326L;
public int compare(String s1, String s2) {
int n1 = s1.length();
int n2 = s2.length();
int min = Math.min(n1, n2);
for (int i = 0; i < min; i++) {
char c1 = s1.charAt(i);
char c2 = s2.charAt(i);
if (c1 != c2) {
c1 = Character.toUpperCase(c1);
c2 = Character.toUpperCase(c2);
if (c1 != c2) {
c1 = Character.toLowerCase(c1);
c2 = Character.toLowerCase(c2);
if (c1 != c2) {
// No overflow because of numeric promotion
return c1 - c2;
}
}
}
}
return n1 - n2;
}
}
/**
* Compares two strings lexicographically, ignoring case
* differences. This method returns an integer whose sign is that of
* calling <code>compareTo</code> with normalized versions of the strings
* where case differences have been eliminated by calling
* <code>Character.toLowerCase(Character.toUpperCase(character))</code> on
* each character.
* <p>
* Note that this method does <em>not</em> take locale into account,
* and will result in an unsatisfactory ordering for certain locales.
* The java.text package provides <em>collators</em> to allow
* locale-sensitive ordering.
*
* @param str the <code>String</code> to be compared.
* @return a negative integer, zero, or a positive integer as the
* specified String is greater than, equal to, or less
* than this String, ignoring case considerations.
* @see java.text.Collator#compare(String, String)
* @since 1.2
*/
public int compareToIgnoreCase(String str) {
return CASE_INSENSITIVE_ORDER.compare(this, str);
}
/**
* Tests if two string regions are equal.
* <p>
* A substring of this <tt>String</tt> object is compared to a substring
* of the argument other. The result is true if these substrings
* represent identical character sequences. The substring of this
* <tt>String</tt> object to be compared begins at index <tt>toffset</tt>
* and has length <tt>len</tt>. The substring of other to be compared
* begins at index <tt>ooffset</tt> and has length <tt>len</tt>. The
* result is <tt>false</tt> if and only if at least one of the following
* is true:
* <ul><li><tt>toffset</tt> is negative.
* <li><tt>ooffset</tt> is negative.
* <li><tt>toffset+len</tt> is greater than the length of this
* <tt>String</tt> object.
* <li><tt>ooffset+len</tt> is greater than the length of the other
* argument.
* <li>There is some nonnegative integer <i>k</i> less than <tt>len</tt>
* such that:
* <tt>this.charAt(toffset+<i>k</i>) != other.charAt(ooffset+<i>k</i>)</tt>
* </ul>
*
* @param toffset the starting offset of the subregion in this string.
* @param other the string argument.
* @param ooffset the starting offset of the subregion in the string
* argument.
* @param len the number of characters to compare.
* @return <code>true</code> if the specified subregion of this string
* exactly matches the specified subregion of the string argument;
* <code>false</code> otherwise.
*/
public boolean regionMatches(int toffset, String other, int ooffset,
int len) {
char ta[] = value;
int to = toffset;
char pa[] = other.value;
int po = ooffset;
// Note: toffset, ooffset, or len might be near -1>>>1.
if ((ooffset < 0) || (toffset < 0)
|| (toffset > (long)value.length - len)
|| (ooffset > (long)other.value.length - len)) {
return false;
}
while (len-- > 0) {
if (ta[to++] != pa[po++]) {
return false;
}
}
return true;
}
/**
* Tests if two string regions are equal.
* <p>
* A substring of this <tt>String</tt> object is compared to a substring
* of the argument <tt>other</tt>. The result is <tt>true</tt> if these
* substrings represent character sequences that are the same, ignoring
* case if and only if <tt>ignoreCase</tt> is true. The substring of
* this <tt>String</tt> object to be compared begins at index
* <tt>toffset</tt> and has length <tt>len</tt>. The substring of
* <tt>other</tt> to be compared begins at index <tt>ooffset</tt> and
* has length <tt>len</tt>. The result is <tt>false</tt> if and only if
* at least one of the following is true:
* <ul><li><tt>toffset</tt> is negative.
* <li><tt>ooffset</tt> is negative.
* <li><tt>toffset+len</tt> is greater than the length of this
* <tt>String</tt> object.
* <li><tt>ooffset+len</tt> is greater than the length of the other
* argument.
* <li><tt>ignoreCase</tt> is <tt>false</tt> and there is some nonnegative
* integer <i>k</i> less than <tt>len</tt> such that:
* <blockquote><pre>
* this.charAt(toffset+k) != other.charAt(ooffset+k)
* </pre></blockquote>
* <li><tt>ignoreCase</tt> is <tt>true</tt> and there is some nonnegative
* integer <i>k</i> less than <tt>len</tt> such that:
* <blockquote><pre>
* Character.toLowerCase(this.charAt(toffset+k)) !=
Character.toLowerCase(other.charAt(ooffset+k))
* </pre></blockquote>
* and:
* <blockquote><pre>
* Character.toUpperCase(this.charAt(toffset+k)) !=
* Character.toUpperCase(other.charAt(ooffset+k))
* </pre></blockquote>
* </ul>
*
* @param ignoreCase if <code>true</code>, ignore case when comparing
* characters.
* @param toffset the starting offset of the subregion in this
* string.
* @param other the string argument.
* @param ooffset the starting offset of the subregion in the string
* argument.
* @param len the number of characters to compare.
* @return <code>true</code> if the specified subregion of this string
* matches the specified subregion of the string argument;
* <code>false</code> otherwise. Whether the matching is exact
* or case insensitive depends on the <code>ignoreCase</code>
* argument.
*/
public boolean regionMatches(boolean ignoreCase, int toffset,
String other, int ooffset, int len) {
char ta[] = value;
int to = toffset;
char pa[] = other.value;
int po = ooffset;
// Note: toffset, ooffset, or len might be near -1>>>1.
if ((ooffset < 0) || (toffset < 0)
|| (toffset > (long)value.length - len)
|| (ooffset > (long)other.value.length - len)) {
return false;
}
while (len-- > 0) {
char c1 = ta[to++];
char c2 = pa[po++];
if (c1 == c2) {
continue;
}
if (ignoreCase) {
// If characters don't match but case may be ignored,
// try converting both characters to uppercase.
// If the results match, then the comparison scan should
// continue.
char u1 = Character.toUpperCase(c1);
char u2 = Character.toUpperCase(c2);
if (u1 == u2) {
continue;
}
// Unfortunately, conversion to uppercase does not work properly
// for the Georgian alphabet, which has strange rules about case
// conversion. So we need to make one last check before
// exiting.
if (Character.toLowerCase(u1) == Character.toLowerCase(u2)) {
continue;
}
}
return false;
}
return true;
}