View Javadoc
1   ////////////////////////////////////////////////////////////////////////////////
2   // checkstyle: Checks Java source code for adherence to a set of rules.
3   // Copyright (C) 2001-2020 the original author or authors.
4   //
5   // This library is free software; you can redistribute it and/or
6   // modify it under the terms of the GNU Lesser General Public
7   // License as published by the Free Software Foundation; either
8   // version 2.1 of the License, or (at your option) any later version.
9   //
10  // This library is distributed in the hope that it will be useful,
11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  // Lesser General Public License for more details.
14  //
15  // You should have received a copy of the GNU Lesser General Public
16  // License along with this library; if not, write to the Free Software
17  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  ////////////////////////////////////////////////////////////////////////////////
19  
20  package com.puppycrawl.tools.checkstyle.api;
21  
22  import java.io.BufferedReader;
23  import java.io.File;
24  import java.io.FileNotFoundException;
25  import java.io.IOException;
26  import java.io.InputStream;
27  import java.io.InputStreamReader;
28  import java.io.Reader;
29  import java.io.StringReader;
30  import java.nio.charset.Charset;
31  import java.nio.charset.CharsetDecoder;
32  import java.nio.charset.CodingErrorAction;
33  import java.nio.charset.UnsupportedCharsetException;
34  import java.nio.file.Files;
35  import java.util.ArrayList;
36  import java.util.Arrays;
37  import java.util.List;
38  import java.util.regex.Matcher;
39  import java.util.regex.Pattern;
40  
41  import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
42  
43  /**
44   * Represents the text contents of a file of arbitrary plain text type.
45   * <p>
46   * This class will be passed to instances of class FileSetCheck by
47   * Checker.
48   * </p>
49   *
50   */
51  public final class FileText {
52  
53      /**
54       * The number of characters to read in one go.
55       */
56      private static final int READ_BUFFER_SIZE = 1024;
57  
58      /**
59       * Regular expression pattern matching all line terminators.
60       */
61      private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
62  
63      // For now, we always keep both full text and lines array.
64      // In the long run, however, the one passed at initialization might be
65      // enough, while the other could be lazily created when requested.
66      // This would save memory but cost CPU cycles.
67  
68      /**
69       * The name of the file.
70       * {@code null} if no file name is available for whatever reason.
71       */
72      private final File file;
73  
74      /**
75       * The charset used to read the file.
76       * {@code null} if the file was reconstructed from a list of lines.
77       */
78      private final Charset charset;
79  
80      /**
81       * The lines of the file, without terminators.
82       */
83      private final String[] lines;
84  
85      /**
86       * The full text contents of the file.
87       *
88       * <p>Field is not final to ease reaching full test coverage.
89       *
90       * @noinspection FieldMayBeFinal
91       */
92      private String fullText;
93  
94      /**
95       * The first position of each line within the full text.
96       */
97      private int[] lineBreaks;
98  
99      /**
100      * Copy constructor.
101      *
102      * @param fileText to make copy of
103      */
104     public FileText" href="../../../../../com/puppycrawl/tools/checkstyle/api/FileText.html#FileText">FileText(FileText fileText) {
105         file = fileText.file;
106         charset = fileText.charset;
107         fullText = fileText.fullText;
108         lines = fileText.lines.clone();
109         if (fileText.lineBreaks == null) {
110             lineBreaks = null;
111         }
112         else {
113             lineBreaks = fileText.lineBreaks.clone();
114         }
115     }
116 
117     /**
118      * Compatibility constructor.
119      *
120      * <p>This constructor reconstructs the text of the file by joining
121      * lines with linefeed characters. This process does not restore
122      * the original line terminators and should therefore be avoided.
123      *
124      * @param file the name of the file
125      * @param lines the lines of the text, without terminators
126      * @throws NullPointerException if the lines array is null
127      */
128     public FileText(File file, List<String> lines) {
129         final StringBuilder buf = new StringBuilder(1024);
130         for (final String line : lines) {
131             buf.append(line).append('\n');
132         }
133 
134         this.file = file;
135         charset = null;
136         fullText = buf.toString();
137         this.lines = lines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
138     }
139 
140     /**
141      * Creates a new file text representation.
142      *
143      * <p>The file will be read using the specified encoding, replacing
144      * malformed input and unmappable characters with the default
145      * replacement character.
146      *
147      * @param file the name of the file
148      * @param charsetName the encoding to use when reading the file
149      * @throws NullPointerException if the text is null
150      * @throws IOException if the file could not be read
151      */
152     public FileText(File file, String charsetName) throws IOException {
153         this.file = file;
154 
155         // We use our own decoder, to be sure we have complete control
156         // about replacements.
157         final CharsetDecoder decoder;
158         try {
159             charset = Charset.forName(charsetName);
160             decoder = charset.newDecoder();
161             decoder.onMalformedInput(CodingErrorAction.REPLACE);
162             decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
163         }
164         catch (final UnsupportedCharsetException ex) {
165             final String message = "Unsupported charset: " + charsetName;
166             throw new IllegalStateException(message, ex);
167         }
168 
169         fullText = readFile(file, decoder);
170 
171         // Use the BufferedReader to break down the lines as this
172         // is about 30% faster than using the
173         // LINE_TERMINATOR.split(fullText, -1) method
174         try (BufferedReader reader = new BufferedReader(new StringReader(fullText))) {
175             final ArrayList<String> textLines = new ArrayList<>();
176             while (true) {
177                 final String line = reader.readLine();
178                 if (line == null) {
179                     break;
180                 }
181                 textLines.add(line);
182             }
183             lines = textLines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
184         }
185     }
186 
187     /**
188      * Reads file using specific decoder and returns all its content as a String.
189      *
190      * @param inputFile File to read
191      * @param decoder Charset decoder
192      * @return File's text
193      * @throws IOException Unable to open or read the file
194      * @throws FileNotFoundException when inputFile does not exists
195      */
196     private static String readFile(final File inputFile, final CharsetDecoder decoder)
197             throws IOException {
198         if (!inputFile.exists()) {
199             throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
200         }
201         final StringBuilder buf = new StringBuilder(1024);
202         final InputStream stream = Files.newInputStream(inputFile.toPath());
203         try (Reader reader = new InputStreamReader(stream, decoder)) {
204             final char[] chars = new char[READ_BUFFER_SIZE];
205             while (true) {
206                 final int len = reader.read(chars);
207                 if (len == -1) {
208                     break;
209                 }
210                 buf.append(chars, 0, len);
211             }
212         }
213         return buf.toString();
214     }
215 
216     /**
217      * Retrieves a line of the text by its number.
218      * The returned line will not contain a trailing terminator.
219      *
220      * @param lineNo the number of the line to get, starting at zero
221      * @return the line with the given number
222      */
223     public String get(final int lineNo) {
224         return lines[lineNo];
225     }
226 
227     /**
228      * Get the name of the file.
229      *
230      * @return an object containing the name of the file
231      */
232     public File getFile() {
233         return file;
234     }
235 
236     /**
237      * Get the character set which was used to read the file.
238      * Will be {@code null} for a file reconstructed from its lines.
239      *
240      * @return the charset used when the file was read
241      */
242     public Charset getCharset() {
243         return charset;
244     }
245 
246     /**
247      * Retrieve the full text of the file.
248      *
249      * @return the full text of the file
250      */
251     public CharSequence getFullText() {
252         return fullText;
253     }
254 
255     /**
256      * Returns an array of all lines.
257      * {@code text.toLinesArray()} is equivalent to
258      * {@code text.toArray(new String[text.size()])}.
259      *
260      * @return an array of all lines of the text
261      */
262     public String[] toLinesArray() {
263         return lines.clone();
264     }
265 
266     /**
267      * Determine line and column numbers in full text.
268      *
269      * @param pos the character position in the full text
270      * @return the line and column numbers of this character
271      */
272     public LineColumn lineColumn(int pos) {
273         final int[] lineBreakPositions = findLineBreaks();
274         int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
275         if (lineNo < 0) {
276             // we have: lineNo = -(insertion point) - 1
277             // we want: lineNo =  (insertion point) - 1
278             lineNo = -lineNo - 2;
279         }
280         final int startOfLine = lineBreakPositions[lineNo];
281         final int columnNo = pos - startOfLine;
282         // now we have lineNo and columnNo, both starting at zero.
283         return new LineColumn(lineNo + 1, columnNo);
284     }
285 
286     /**
287      * Find positions of line breaks in the full text.
288      *
289      * @return an array giving the first positions of each line.
290      */
291     private int[] findLineBreaks() {
292         if (lineBreaks == null) {
293             final int[] lineBreakPositions = new int[size() + 1];
294             lineBreakPositions[0] = 0;
295             int lineNo = 1;
296             final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
297             while (matcher.find()) {
298                 lineBreakPositions[lineNo] = matcher.end();
299                 lineNo++;
300             }
301             if (lineNo < lineBreakPositions.length) {
302                 lineBreakPositions[lineNo] = fullText.length();
303             }
304             lineBreaks = lineBreakPositions;
305         }
306         return lineBreaks;
307     }
308 
309     /**
310      * Counts the lines of the text.
311      *
312      * @return the number of lines in the text
313      */
314     public int size() {
315         return lines.length;
316     }
317 
318 }