OpenJDK 13に含まれるjava.lang.Stringクラスのソースコードは3500行近いサイズの巨大なものになっている。とても全部は照会できないので,主要な部分だけ抜粋しよう。
java.lang.Stringクラスのソースコードから主な部分を抜粋してみた。文字列の値はvalueというbyte[]配列で保持されていることがわかる。corderはLATIN1とUTF16という定数値のいずれかが設定される。
ディフォルトコンストラクタでは,このvalueとcorderに空文字列のこれらの値を設定している。ASCIIコード系システムの場合は1byteで処理するようになっているようだ。多くのコンストラクタが定義されているが,いずれもこれらの値を求めて設定しているいすぎない。
isEmpty()メソッドは,value配列が長さが0かどうかで判定していることがわかる。
getBytes()メソッドでは,文字列をエンコーディングに従ってbyte[]型に変換する。StringCodingクラスのstaticメソッドencode()を呼び出している。
public final class String
implements java.io.Serializable, Comparable<String>, CharSequence,
Constable, ConstantDesc {
@Stable
private final byte[] value;
private final byte coder;
private int hash; // Default to 0
private boolean hashIsZero; // Default to false;
public String() {
this.value = “”.value;
this.coder = “”.coder;
}
public String(char value[], int offset, int count) {
this(value, offset, count, rangeCheck(value, offset, count));
}
private static Void rangeCheck(char[] value, int offset, int count) {
checkBoundsOffCount(offset, count, value.length);
return null;
}
public String(byte bytes[], int offset, int length, Charset charset) {
if (charset == null)
throw new NullPointerException(“charset”);
checkBoundsOffCount(offset, length, bytes.length);
StringCoding.Result ret =
StringCoding.decode(charset, bytes, offset, length);
this.value = ret.value;
this.coder = ret.coder;
}
public int length() {
return value.length >> coder();
}
public boolean isEmpty() {
return value.length == 0;
}
public char charAt(int index) {
if (isLatin1()) {
return StringLatin1.charAt(value, index);
} else {
return StringUTF16.charAt(value, index);
}
}
public byte[] getBytes(String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null) throw new NullPointerException();
return StringCoding.encode(charsetName, coder(), value);
}
public byte[] getBytes(Charset charset) {
if (charset == null) throw new NullPointerException();
return StringCoding.encode(charset, coder(), value);
}
public boolean equals(Object anObject) {
if (this == anObject) {
return true;
}
if (anObject instanceof String) {
String aString = (String)anObject;
if (!COMPACT_STRINGS || this.coder == aString.coder) {
return StringLatin1.equals(value, aString.value);
}
}
return false;
}
public int compareTo(String anotherString) {
byte v1[] = value;
byte v2[] = anotherString.value;
byte coder = coder();
if (coder == anotherString.coder()) {
return coder == LATIN1 ? StringLatin1.compareTo(v1, v2)
: StringUTF16.compareTo(v1, v2);
}
return coder == LATIN1 ? StringLatin1.compareToUTF16(v1, v2)
: StringUTF16.compareToLatin1(v1, v2);
}
public boolean startsWith(String prefix, int toffset) {
// Note: toffset might be near -1>>>1.
if (toffset < 0 || toffset > length() – prefix.length()) {
return false;
}
byte ta[] = value;
byte pa[] = prefix.value;
int po = 0;
int pc = pa.length;
byte coder = coder();
if (coder == prefix.coder()) {
int to = (coder == LATIN1) ? toffset : toffset << 1;
while (po < pc) {
if (ta[to++] != pa[po++]) {
return false;
}
}
} else {
if (coder == LATIN1) { // && pcoder == UTF16
return false;
}
// coder == UTF16 && pcoder == LATIN1)
while (po < pc) {
if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
return false;
}
}
}
return true;
}
public boolean startsWith(String prefix) {
return startsWith(prefix, 0);
}
public int hashCode() {
// The hash or hashIsZero fields are subject to a benign data race,
// making it crucial to ensure that any observable result of the
// calculation in this method stays correct under any possible read of
// these fields. Necessary restrictions to allow this to be correct
// without explicit memory fences or similar concurrency primitives is
// that we can ever only write to one of these two fields for a given
// String instance, and that the computation is idempotent and derived
// from immutable state
int h = hash;
if (h == 0 && !hashIsZero) {
h = isLatin1() ? StringLatin1.hashCode(value)
: StringUTF16.hashCode(value);
if (h == 0) {
hashIsZero = true;
} else {
hash = h;
}
}
return h;
}
public String substring(int beginIndex, int endIndex) {
int length = length();
checkBoundsBeginEnd(beginIndex, endIndex, length);
int subLen = endIndex – beginIndex;
if (beginIndex == 0 && endIndex == length) {
return this;
}
return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
: StringUTF16.newString(value, beginIndex, subLen);
}
public String replace(char oldChar, char newChar) {
if (oldChar != newChar) {
String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
: StringUTF16.replace(value, oldChar, newChar);
if (ret != null) {
return ret;
}
}
return this;
}
public boolean matches(String regex) {
return Pattern.matches(regex, this);
}
public String replaceAll(String regex, String replacement) {
return Pattern.compile(regex).matcher(this).replaceAll(replacement);
}
public String[] split(String regex, int limit) {
/* fastpath if the regex is a
(1)one-char String and this character is not one of the
RegEx’s meta characters “.$|()[{^?*+\\”, or
(2)two-char String and the first char is the backslash and
the second is not the ascii digit or ascii letter.
*/
char ch = 0;
if (((regex.length() == 1 &&
“.$|()[{^?*+\\”.indexOf(ch = regex.charAt(0)) == -1) ||
(regex.length() == 2 &&
regex.charAt(0) == ‘\\’ &&
(((ch = regex.charAt(1))-‘0’)|(‘9’-ch)) < 0 &&
((ch-‘a’)|(‘z’-ch)) < 0 &&
((ch-‘A’)|(‘Z’-ch)) < 0)) &&
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
int off = 0;
int next = 0;
boolean limited = limit > 0;
ArrayList<String> list = new ArrayList<>();
while ((next = indexOf(ch, off)) != -1) {
if (!limited || list.size() < limit – 1) {
list.add(substring(off, next));
off = next + 1;
} else { // last one
//assert (list.size() == limit – 1);
int last = length();
list.add(substring(off, last));
off = last;
break;
}
}
// If no match was found, return this
if (off == 0)
return new String[]{this};
// Add remaining segment
if (!limited || list.size() < limit)
list.add(substring(off, length()));
// Construct result
int resultSize = list.size();
if (limit == 0) {
while (resultSize > 0 && list.get(resultSize – 1).isEmpty()) {
resultSize–;
}
}
String[] result = new String[resultSize];
return list.subList(0, resultSize).toArray(result);
}
return Pattern.compile(regex).split(this, limit);
}
public static String valueOf(boolean b) {
return b ? “true” : “false”;
}
public static String valueOf(int i) {
return Integer.toString(i);
}
}
implements java.io.Serializable, Comparable<String>, CharSequence,
Constable, ConstantDesc {
@Stable
private final byte[] value;
private final byte coder;
private int hash; // Default to 0
private boolean hashIsZero; // Default to false;
public String() {
this.value = “”.value;
this.coder = “”.coder;
}
public String(char value[], int offset, int count) {
this(value, offset, count, rangeCheck(value, offset, count));
}
private static Void rangeCheck(char[] value, int offset, int count) {
checkBoundsOffCount(offset, count, value.length);
return null;
}
public String(byte bytes[], int offset, int length, Charset charset) {
if (charset == null)
throw new NullPointerException(“charset”);
checkBoundsOffCount(offset, length, bytes.length);
StringCoding.Result ret =
StringCoding.decode(charset, bytes, offset, length);
this.value = ret.value;
this.coder = ret.coder;
}
public int length() {
return value.length >> coder();
}
public boolean isEmpty() {
return value.length == 0;
}
public char charAt(int index) {
if (isLatin1()) {
return StringLatin1.charAt(value, index);
} else {
return StringUTF16.charAt(value, index);
}
}
public byte[] getBytes(String charsetName)
throws UnsupportedEncodingException {
if (charsetName == null) throw new NullPointerException();
return StringCoding.encode(charsetName, coder(), value);
}
public byte[] getBytes(Charset charset) {
if (charset == null) throw new NullPointerException();
return StringCoding.encode(charset, coder(), value);
}
public boolean equals(Object anObject) {
if (this == anObject) {
return true;
}
if (anObject instanceof String) {
String aString = (String)anObject;
if (!COMPACT_STRINGS || this.coder == aString.coder) {
return StringLatin1.equals(value, aString.value);
}
}
return false;
}
public int compareTo(String anotherString) {
byte v1[] = value;
byte v2[] = anotherString.value;
byte coder = coder();
if (coder == anotherString.coder()) {
return coder == LATIN1 ? StringLatin1.compareTo(v1, v2)
: StringUTF16.compareTo(v1, v2);
}
return coder == LATIN1 ? StringLatin1.compareToUTF16(v1, v2)
: StringUTF16.compareToLatin1(v1, v2);
}
public boolean startsWith(String prefix, int toffset) {
// Note: toffset might be near -1>>>1.
if (toffset < 0 || toffset > length() – prefix.length()) {
return false;
}
byte ta[] = value;
byte pa[] = prefix.value;
int po = 0;
int pc = pa.length;
byte coder = coder();
if (coder == prefix.coder()) {
int to = (coder == LATIN1) ? toffset : toffset << 1;
while (po < pc) {
if (ta[to++] != pa[po++]) {
return false;
}
}
} else {
if (coder == LATIN1) { // && pcoder == UTF16
return false;
}
// coder == UTF16 && pcoder == LATIN1)
while (po < pc) {
if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) {
return false;
}
}
}
return true;
}
public boolean startsWith(String prefix) {
return startsWith(prefix, 0);
}
public int hashCode() {
// The hash or hashIsZero fields are subject to a benign data race,
// making it crucial to ensure that any observable result of the
// calculation in this method stays correct under any possible read of
// these fields. Necessary restrictions to allow this to be correct
// without explicit memory fences or similar concurrency primitives is
// that we can ever only write to one of these two fields for a given
// String instance, and that the computation is idempotent and derived
// from immutable state
int h = hash;
if (h == 0 && !hashIsZero) {
h = isLatin1() ? StringLatin1.hashCode(value)
: StringUTF16.hashCode(value);
if (h == 0) {
hashIsZero = true;
} else {
hash = h;
}
}
return h;
}
public String substring(int beginIndex, int endIndex) {
int length = length();
checkBoundsBeginEnd(beginIndex, endIndex, length);
int subLen = endIndex – beginIndex;
if (beginIndex == 0 && endIndex == length) {
return this;
}
return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen)
: StringUTF16.newString(value, beginIndex, subLen);
}
public String replace(char oldChar, char newChar) {
if (oldChar != newChar) {
String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar)
: StringUTF16.replace(value, oldChar, newChar);
if (ret != null) {
return ret;
}
}
return this;
}
public boolean matches(String regex) {
return Pattern.matches(regex, this);
}
public String replaceAll(String regex, String replacement) {
return Pattern.compile(regex).matcher(this).replaceAll(replacement);
}
public String[] split(String regex, int limit) {
/* fastpath if the regex is a
(1)one-char String and this character is not one of the
RegEx’s meta characters “.$|()[{^?*+\\”, or
(2)two-char String and the first char is the backslash and
the second is not the ascii digit or ascii letter.
*/
char ch = 0;
if (((regex.length() == 1 &&
“.$|()[{^?*+\\”.indexOf(ch = regex.charAt(0)) == -1) ||
(regex.length() == 2 &&
regex.charAt(0) == ‘\\’ &&
(((ch = regex.charAt(1))-‘0’)|(‘9’-ch)) < 0 &&
((ch-‘a’)|(‘z’-ch)) < 0 &&
((ch-‘A’)|(‘Z’-ch)) < 0)) &&
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
int off = 0;
int next = 0;
boolean limited = limit > 0;
ArrayList<String> list = new ArrayList<>();
while ((next = indexOf(ch, off)) != -1) {
if (!limited || list.size() < limit – 1) {
list.add(substring(off, next));
off = next + 1;
} else { // last one
//assert (list.size() == limit – 1);
int last = length();
list.add(substring(off, last));
off = last;
break;
}
}
// If no match was found, return this
if (off == 0)
return new String[]{this};
// Add remaining segment
if (!limited || list.size() < limit)
list.add(substring(off, length()));
// Construct result
int resultSize = list.size();
if (limit == 0) {
while (resultSize > 0 && list.get(resultSize – 1).isEmpty()) {
resultSize–;
}
}
String[] result = new String[resultSize];
return list.subList(0, resultSize).toArray(result);
}
return Pattern.compile(regex).split(this, limit);
}
public static String valueOf(boolean b) {
return b ? “true” : “false”;
}
public static String valueOf(int i) {
return Integer.toString(i);
}
}