提交 38af8cf8 编写于 作者: K Kohsuke Kawaguchi

added TreeString code.

This code was originally developed for analysis-core plugin, but I think it's useful outside it.
I'm promoting this up to core
上级 2338b668
/*
* The MIT License
*
* Copyright (c) 2012, CloudBees, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package jenkins.util;
import java.io.Serializable;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import com.thoughtworks.xstream.XStream;
import com.thoughtworks.xstream.converters.Converter;
import com.thoughtworks.xstream.converters.MarshallingContext;
import com.thoughtworks.xstream.converters.UnmarshallingContext;
import com.thoughtworks.xstream.io.HierarchicalStreamReader;
import com.thoughtworks.xstream.io.HierarchicalStreamWriter;
/**
* {@link TreeString} is an alternative string representation that saves the
* memory when you have a large number of strings that share common prefixes
* (such as various file names.)
* <p>
* {@link TreeString} can be built with {@link TreeStringBuilder}.
*
* @author Kohsuke Kawaguchi
* @since 1.473
*/
// CHECKSTYLE:OFF
@SuppressWarnings("PMD")
public final class TreeString implements Serializable {
private static final long serialVersionUID = 3621959682117480904L;
/**
* Parent node that represents the prefix.
*/
private TreeString parent;
/**
* {@link #parent}+{@link #label} is the string value of this node.
*/
private char[] label;
/**
* Creates a new root {@link TreeString}
*/
/* package */TreeString() {
this(null, "");
}
/* package */TreeString(final TreeString parent, final String label) {
assert parent == null || label.length() > 0; // if there's a parent,
// label can't be empty.
this.parent = parent;
this.label = label.toCharArray(); // string created as a substring of
// another string can have a lot of
// garbage attached to it.
}
/* package */String getLabel() {
return new String(label);
}
/**
* Inserts a new node between this node and its parent, and returns the
* newly inserted node.
* <p>
* This operation doesn't change the string representation of this node.
*/
/* package */TreeString split(final String prefix) {
assert getLabel().startsWith(prefix);
char[] suffix = new char[label.length - prefix.length()];
System.arraycopy(label, prefix.length(), suffix, 0, suffix.length);
TreeString middle = new TreeString(parent, prefix);
label = suffix;
parent = middle;
return middle;
}
/**
* How many nodes do we have from the root to this node (including 'this'
* itself?) Thus depth of the root node is 1.
*/
private int depth() {
int i = 0;
for (TreeString p = this; p != null; p = p.parent) {
i++;
}
return i;
}
@Override
public boolean equals(final Object rhs) {
if (rhs == null) {
return false;
}
return rhs.getClass() == TreeString.class
&& ((TreeString)rhs).getLabel().equals(getLabel());
}
@Override
public int hashCode() {
int h = parent == null ? 0 : parent.hashCode();
for (int i = 0; i < label.length; i++) {
h = 31 * h + label[i];
}
assert toString().hashCode() == h;
return h;
}
/**
* Returns the full string representation.
*/
@Override
public String toString() {
char[][] tokens = new char[depth()][];
int i = tokens.length;
int sz = 0;
for (TreeString p = this; p != null; p = p.parent) {
tokens[--i] = p.label;
sz += p.label.length;
}
StringBuilder buf = new StringBuilder(sz);
for (char[] token : tokens) {
buf.append(token);
}
return buf.toString();
}
/**
* Interns {@link #label}
*/
/* package */void dedup(final Map<String, char[]> table) {
String l = getLabel();
char[] v = table.get(l);
if (v != null) {
label = v;
}
else {
table.put(l, label);
}
}
public boolean isBlank() {
return StringUtils.isBlank(toString());
}
public static String toString(final TreeString t) {
return t == null ? null : t.toString();
}
/**
* Creates a {@link TreeString}. Useful if you need to create one-off
* {@link TreeString} without {@link TreeStringBuilder}. Memory consumption
* is still about the same to {@code new String(s)}.
*
* @return null if the parameter is null
*/
public static TreeString of(final String s) {
if (s == null) {
return null;
}
return new TreeString(null, s);
}
/**
* Default {@link Converter} implementation for XStream that does interning
* scoped to one unmarshalling.
*/
@SuppressWarnings("all")
public static final class ConverterImpl implements Converter {
public ConverterImpl(final XStream xs) {}
public void marshal(final Object source, final HierarchicalStreamWriter writer,
final MarshallingContext context) {
writer.setValue(source == null ? null : source.toString());
}
public Object unmarshal(final HierarchicalStreamReader reader, final UnmarshallingContext context) {
TreeStringBuilder builder = (TreeStringBuilder)context.get(TreeStringBuilder.class);
if (builder == null) {
context.put(TreeStringBuilder.class, builder = new TreeStringBuilder());
// dedup at the end
final TreeStringBuilder _builder = builder;
context.addCompletionCallback(new Runnable() {
public void run() {
_builder.dedup();
}
}, 0);
}
return builder.intern(reader.getValue());
}
public boolean canConvert(final Class type) {
return type == TreeString.class;
}
}
}
package jenkins.util;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* Builds {@link TreeString}s that share common prefixes. Call
* {@link #intern(String)} and you get the {@link TreeString} that represents
* the same string, but as you interns more strings that share the same
* prefixes, those {@link TreeString}s that you get back start to share data.
* <p>
* Because the internal state of {@link TreeString}s get mutated as new strings
* are interned (to exploit new-found common prefixes), {@link TreeString}s
* returned from {@link #intern(String)} aren't thread-safe until
* {@link TreeStringBuilder} is disposed. That is, you have to make sure other
* threads don't see those {@link TreeString}s until you are done interning
* strings.
*
* @author Kohsuke Kawaguchi
* @since 1.473
*/
@SuppressWarnings({"PMD", "all"})
//CHECKSTYLE:OFF
public class TreeStringBuilder {
Child root = new Child(new TreeString());
private static class Child {
private final TreeString node;
private Map<String, Child> children = NO_CHILDREN;
private Child(final TreeString node) {
this.node = node;
}
/**
* Adds one edge and leaf to this tree node, or returns an existing node
* if any.
*/
public Child intern(final String s) {
if (s.length() == 0) {
return this;
}
makeWritable();
for (Map.Entry<String, Child> e : children.entrySet()) {
int plen = commonPrefix(e.getKey(), s);
if (plen > 0) {
if (plen < e.getKey().length()) {
// insert a node between this and e.value
Child c = e.getValue();
String prefix = s.substring(0, plen);
Child middle = c.split(prefix);
// add 'middle' instead of 'c'
children.remove(e.getKey());
children.put(prefix, middle);
return middle.intern(s.substring(plen));
}
else {// entire key is suffix
return e.getValue().intern(s.substring(plen));
}
}
}
// no common prefix. an entirely new node.
Child t = children.get(s);
if (t == null) {
children.put(s, t = new Child(new TreeString(node, s)));
}
return t;
}
/**
* Makes sure {@link #children} is writable.
*/
private void makeWritable() {
if (children == NO_CHILDREN) {
children = new HashMap<String, Child>();
}
}
/**
* Inserts a new node between this node and its parent, and returns that
* node. Newly inserted 'middle' node will have this node as its sole
* child.
*/
private Child split(final String prefix) {
String suffix = node.getLabel().substring(prefix.length());
Child middle = new Child(node.split(prefix));
middle.makeWritable();
middle.children.put(suffix, this);
return middle;
}
/**
* Returns the common prefix between two strings.
*/
private int commonPrefix(final String a, final String b) {
int m = Math.min(a.length(), b.length());
for (int i = 0; i < m; i++) {
if (a.charAt(i) != b.charAt(i)) {
return i;
}
}
return m;
}
/**
* Calls {@link TreeString#dedup(Map)} recursively.
*/
private void dedup(final Map<String, char[]> table) {
node.dedup(table);
for (Child child : children.values()) {
child.dedup(table);
}
}
}
/**
* Interns a string.
*/
public TreeString intern(final String s) {
if (s==null) return null;
return root.intern(s).node;
}
/**
* Interns a {@link TreeString} created elsewhere.
*/
public TreeString intern(final TreeString s) {
if (s==null) return null;
return root.intern(s.toString()).node;
}
/**
* Further reduces the memory footprint by finding the same labels across
* multiple {@link TreeString}s.
*/
public void dedup() {
root.dedup(new HashMap<String, char[]>());
}
/**
* Place holder that represents no child node, until one is added.
*/
private static final Map<String, Child> NO_CHILDREN = Collections.emptyMap();
}
package jenkins.util;
import static org.junit.Assert.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import org.junit.Test;
/**
* Tests the class {@link TreeStringBuilder}.
*
* @author Kohsuke Kawaguchi
*/
@SuppressWarnings({"PMD", "all"})
//CHECKSTYLE:OFF
public class TreeStringBuilderTest {
/**
* Tests the simple operations inside the builder.
*/
@Test
public void test() {
TreeStringBuilder b = new TreeStringBuilder();
verify("foo", b.intern("foo"));
TreeString s = b.intern("foo/bar/zot");
verify("foo/bar/zot", s);
verify("", b.intern(""));
verify("foo/bar/xxx", b.intern("foo/bar/xxx")); // this will create new
// middle node
verify("foo/bar/zot", s); // make sure existing strings aren't affected
}
/**
* Pseudo random (but deterministic) test.
*/
@Test
public void testRandom() {
String[] dict = new String[]{"aa","b","aba","ba"};
TreeStringBuilder x = new TreeStringBuilder();
Random r = new Random(0);
List<String> a = new ArrayList<String>();
List<TreeString> o = new ArrayList<TreeString>();
for (int i = 0; i < 1000; i++) {
StringBuilder b = new StringBuilder();
for (int j = 0; j < r.nextInt(10) + 3; j++) {
b.append(dict[r.nextInt(4)]);
}
String s = b.toString();
a.add(s);
TreeString p = x.intern(s);
verify(s, p);
o.add(p);
}
// make sure values are still all intact
for (int i = 0; i < a.size(); i++) {
verify(a.get(i), o.get(i));
}
x.dedup();
// verify one more time
for (int i = 0; i < a.size(); i++) {
verify(a.get(i), o.get(i));
}
}
private void verify(final String s, final TreeString t) {
assertEquals(s, t.toString());
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册