TextFile.java 7.4 KB
Newer Older
K
kohsuke 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * The MIT License
 * 
 * Copyright (c) 2004-2009, Sun Microsystems, Inc., Kohsuke Kawaguchi
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
K
kohsuke 已提交
24 25
package hudson.util;

K
Kohsuke Kawaguchi 已提交
26 27
import com.google.common.collect.*;

28
import java.nio.file.Files;
K
Kohsuke Kawaguchi 已提交
29
import javax.annotation.Nonnull;
K
kohsuke 已提交
30 31 32
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
K
Kohsuke Kawaguchi 已提交
33
import java.io.FileReader;
K
kohsuke 已提交
34 35 36
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
K
Kohsuke Kawaguchi 已提交
37 38
import java.io.RandomAccessFile;
import java.io.Reader;
K
kohsuke 已提交
39
import java.io.StringWriter;
K
Kohsuke Kawaguchi 已提交
40
import java.nio.charset.Charset;
K
Kohsuke Kawaguchi 已提交
41
import java.util.Iterator;
K
kohsuke 已提交
42 43 44 45 46 47 48 49 50

/**
 * Represents a text file.
 *
 * Provides convenience methods for reading and writing to it.
 *
 * @author Kohsuke Kawaguchi
 */
public class TextFile {
51
    public final File file;
K
kohsuke 已提交
52 53 54 55 56 57 58 59 60

    public TextFile(File file) {
        this.file = file;
    }

    public boolean exists() {
        return file.exists();
    }

K
kohsuke 已提交
61 62 63 64
    public void delete() {
        file.delete();
    }

K
kohsuke 已提交
65 66 67 68 69 70
    /**
     * Reads the entire contents and returns it.
     */
    public String read() throws IOException {
        StringWriter out = new StringWriter();
        PrintWriter w = new PrintWriter(out);
71
        try (BufferedReader in = new BufferedReader(new InputStreamReader(Files.newInputStream(file.toPath()), "UTF-8"))) {
K
kohsuke 已提交
72
            String line;
N
Nicolas De Loof 已提交
73
            while ((line = in.readLine()) != null)
K
kohsuke 已提交
74 75 76 77 78
                w.println(line);
        }
        return out.toString();
    }

K
Kohsuke Kawaguchi 已提交
79 80 81 82 83 84 85 86
    /**
     * Parse text file line by line.
     */
    public Iterable<String> lines() {
        return new Iterable<String>() {
            @Override
            public Iterator<String> iterator() {
                try {
87 88
                    final BufferedReader in = new BufferedReader(new InputStreamReader(
                            Files.newInputStream(file.toPath()),"UTF-8"));
K
Kohsuke Kawaguchi 已提交
89 90 91 92 93 94

                    return new AbstractIterator<String>() {
                        @Override
                        protected String computeNext() {
                            try {
                                String r = in.readLine();
95 96 97 98
                                if (r==null) {
                                    in.close();
                                    return endOfData();
                                }
K
Kohsuke Kawaguchi 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111
                                return r;
                            } catch (IOException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    };
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
        };
    }

K
kohsuke 已提交
112 113 114 115
    /**
     * Overwrites the file by the given string.
     */
    public void write(String text) throws IOException {
116
        file.getParentFile().mkdirs();
K
kohsuke 已提交
117
        AtomicFileWriter w = new AtomicFileWriter(file);
118 119 120
        try {
            w.write(text);
            w.commit();
121
        } finally {
122 123
            w.abort();
        }
K
kohsuke 已提交
124 125
    }

K
Kohsuke Kawaguchi 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
    /**
     * Reads the first N characters or until we hit EOF.
     */
    public @Nonnull String head(int numChars) throws IOException {
        char[] buf = new char[numChars];
        int read = 0;
        Reader r = new FileReader(file);

        try {
            while (read<numChars) {
                int d = r.read(buf,read,buf.length-read);
                if (d<0)
                    break;
                read += d;
            }

            return new String(buf,0,read);
        } finally {
144
            org.apache.commons.io.IOUtils.closeQuietly(r);
K
Kohsuke Kawaguchi 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
        }
    }

    /**
     * Efficiently reads the last N characters (or shorter, if the whole file is shorter than that.)
     *
     * <p>
     * This method first tries to just read the tail section of the file to get the necessary chars.
     * To handle multi-byte variable length encoding (such as UTF-8), we read a larger than
     * necessary chunk.
     *
     * <p>
     * Some multi-byte encoding, such as Shift-JIS (http://en.wikipedia.org/wiki/Shift_JIS) doesn't
     * allow the first byte and the second byte of a single char to be unambiguously identified,
     * so it is possible that we end up decoding incorrectly if we start reading in the middle of a multi-byte
     * character. All the CJK multi-byte encodings that I know of are self-correcting; as they are ASCII-compatible,
     * any ASCII characters or control characters will bring the decoding back in sync, so the worst
     * case we just have some garbage in the beginning that needs to be discarded. To accommodate this,
     * we read additional 1024 bytes.
     *
     * <p>
     * Other encodings, such as UTF-8, are better in that the character boundary is unambiguous,
     * so there can be at most one garbage char. For dealing with UTF-16 and UTF-32, we read at
     * 4 bytes boundary (all the constants and multipliers are multiples of 4.)
     *
     * <p>
     * Note that it is possible to construct a contrived input that fools this algorithm, and in this method
     * we are willing to live with a small possibility of that to avoid reading the whole text. In practice,
     * such an input is very unlikely.
     *
     * <p>
     * So all in all, this algorithm should work decently, and it works quite efficiently on a large text.
     */
    public @Nonnull String fastTail(int numChars, Charset cs) throws IOException {

N
Nicolas De Loof 已提交
180
        try (RandomAccessFile raf = new RandomAccessFile(file, "r")) {
K
Kohsuke Kawaguchi 已提交
181 182 183
            long len = raf.length();
            // err on the safe side and assume each char occupies 4 bytes
            // additional 1024 byte margin is to bring us back in sync in case we started reading from non-char boundary.
N
Nicolas De Loof 已提交
184
            long pos = Math.max(0, len - (numChars * 4 + 1024));
K
Kohsuke Kawaguchi 已提交
185
            raf.seek(pos);
K
Kohsuke Kawaguchi 已提交
186

N
Nicolas De Loof 已提交
187
            byte[] tail = new byte[(int) (len - pos)];
K
Kohsuke Kawaguchi 已提交
188
            raf.readFully(tail);
K
Kohsuke Kawaguchi 已提交
189

K
Kohsuke Kawaguchi 已提交
190
            String tails = cs.decode(java.nio.ByteBuffer.wrap(tail)).toString();
K
Kohsuke Kawaguchi 已提交
191

N
Nicolas De Loof 已提交
192
            return new String(tails.substring(Math.max(0, tails.length() - numChars))); // trim the baggage of substring by allocating a new String
K
Kohsuke Kawaguchi 已提交
193
        }
K
Kohsuke Kawaguchi 已提交
194 195 196 197 198 199 200 201 202 203
    }

    /**
     * Uses the platform default encoding.
     */
    public @Nonnull String fastTail(int numChars) throws IOException {
        return fastTail(numChars,Charset.defaultCharset());
    }


K
kohsuke 已提交
204 205 206 207
    public String readTrim() throws IOException {
        return read().trim();
    }

208
    @Override
K
kohsuke 已提交
209 210 211 212
    public String toString() {
        return file.toString();
    }
}