1 ////////////////////////////////////////////////////////////////////////////////
2 // checkstyle: Checks Java source code for adherence to a set of rules.
3 // Copyright (C) 2001-2020 the original author or authors.
4 //
5 // This library is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU Lesser General Public
7 // License as published by the Free Software Foundation; either
8 // version 2.1 of the License, or (at your option) any later version.
9 //
10 // This library is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with this library; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 ////////////////////////////////////////////////////////////////////////////////
19
20 package com.puppycrawl.tools.checkstyle.api;
21
22 import java.io.BufferedReader;
23 import java.io.File;
24 import java.io.FileNotFoundException;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.InputStreamReader;
28 import java.io.Reader;
29 import java.io.StringReader;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetDecoder;
32 import java.nio.charset.CodingErrorAction;
33 import java.nio.charset.UnsupportedCharsetException;
34 import java.nio.file.Files;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.List;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40
41 import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
42
43 /**
44 * Represents the text contents of a file of arbitrary plain text type.
45 * <p>
46 * This class will be passed to instances of class FileSetCheck by
47 * Checker.
48 * </p>
49 *
50 */
51 public final class FileText {
52
53 /**
54 * The number of characters to read in one go.
55 */
56 private static final int READ_BUFFER_SIZE = 1024;
57
58 /**
59 * Regular expression pattern matching all line terminators.
60 */
61 private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
62
63 // For now, we always keep both full text and lines array.
64 // In the long run, however, the one passed at initialization might be
65 // enough, while the other could be lazily created when requested.
66 // This would save memory but cost CPU cycles.
67
68 /**
69 * The name of the file.
70 * {@code null} if no file name is available for whatever reason.
71 */
72 private final File file;
73
74 /**
75 * The charset used to read the file.
76 * {@code null} if the file was reconstructed from a list of lines.
77 */
78 private final Charset charset;
79
80 /**
81 * The lines of the file, without terminators.
82 */
83 private final String[] lines;
84
85 /**
86 * The full text contents of the file.
87 *
88 * <p>Field is not final to ease reaching full test coverage.
89 *
90 * @noinspection FieldMayBeFinal
91 */
92 private String fullText;
93
94 /**
95 * The first position of each line within the full text.
96 */
97 private int[] lineBreaks;
98
99 /**
100 * Copy constructor.
101 *
102 * @param fileText to make copy of
103 */
104 public FileText" href="../../../../../com/puppycrawl/tools/checkstyle/api/FileText.html#FileText">FileText(FileText fileText) {
105 file = fileText.file;
106 charset = fileText.charset;
107 fullText = fileText.fullText;
108 lines = fileText.lines.clone();
109 if (fileText.lineBreaks == null) {
110 lineBreaks = null;
111 }
112 else {
113 lineBreaks = fileText.lineBreaks.clone();
114 }
115 }
116
117 /**
118 * Compatibility constructor.
119 *
120 * <p>This constructor reconstructs the text of the file by joining
121 * lines with linefeed characters. This process does not restore
122 * the original line terminators and should therefore be avoided.
123 *
124 * @param file the name of the file
125 * @param lines the lines of the text, without terminators
126 * @throws NullPointerException if the lines array is null
127 */
128 public FileText(File file, List<String> lines) {
129 final StringBuilder buf = new StringBuilder(1024);
130 for (final String line : lines) {
131 buf.append(line).append('\n');
132 }
133
134 this.file = file;
135 charset = null;
136 fullText = buf.toString();
137 this.lines = lines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
138 }
139
140 /**
141 * Creates a new file text representation.
142 *
143 * <p>The file will be read using the specified encoding, replacing
144 * malformed input and unmappable characters with the default
145 * replacement character.
146 *
147 * @param file the name of the file
148 * @param charsetName the encoding to use when reading the file
149 * @throws NullPointerException if the text is null
150 * @throws IOException if the file could not be read
151 */
152 public FileText(File file, String charsetName) throws IOException {
153 this.file = file;
154
155 // We use our own decoder, to be sure we have complete control
156 // about replacements.
157 final CharsetDecoder decoder;
158 try {
159 charset = Charset.forName(charsetName);
160 decoder = charset.newDecoder();
161 decoder.onMalformedInput(CodingErrorAction.REPLACE);
162 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
163 }
164 catch (final UnsupportedCharsetException ex) {
165 final String message = "Unsupported charset: " + charsetName;
166 throw new IllegalStateException(message, ex);
167 }
168
169 fullText = readFile(file, decoder);
170
171 // Use the BufferedReader to break down the lines as this
172 // is about 30% faster than using the
173 // LINE_TERMINATOR.split(fullText, -1) method
174 try (BufferedReader reader = new BufferedReader(new StringReader(fullText))) {
175 final ArrayList<String> textLines = new ArrayList<>();
176 while (true) {
177 final String line = reader.readLine();
178 if (line == null) {
179 break;
180 }
181 textLines.add(line);
182 }
183 lines = textLines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
184 }
185 }
186
187 /**
188 * Reads file using specific decoder and returns all its content as a String.
189 *
190 * @param inputFile File to read
191 * @param decoder Charset decoder
192 * @return File's text
193 * @throws IOException Unable to open or read the file
194 * @throws FileNotFoundException when inputFile does not exists
195 */
196 private static String readFile(final File inputFile, final CharsetDecoder decoder)
197 throws IOException {
198 if (!inputFile.exists()) {
199 throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
200 }
201 final StringBuilder buf = new StringBuilder(1024);
202 final InputStream stream = Files.newInputStream(inputFile.toPath());
203 try (Reader reader = new InputStreamReader(stream, decoder)) {
204 final char[] chars = new char[READ_BUFFER_SIZE];
205 while (true) {
206 final int len = reader.read(chars);
207 if (len == -1) {
208 break;
209 }
210 buf.append(chars, 0, len);
211 }
212 }
213 return buf.toString();
214 }
215
216 /**
217 * Retrieves a line of the text by its number.
218 * The returned line will not contain a trailing terminator.
219 *
220 * @param lineNo the number of the line to get, starting at zero
221 * @return the line with the given number
222 */
223 public String get(final int lineNo) {
224 return lines[lineNo];
225 }
226
227 /**
228 * Get the name of the file.
229 *
230 * @return an object containing the name of the file
231 */
232 public File getFile() {
233 return file;
234 }
235
236 /**
237 * Get the character set which was used to read the file.
238 * Will be {@code null} for a file reconstructed from its lines.
239 *
240 * @return the charset used when the file was read
241 */
242 public Charset getCharset() {
243 return charset;
244 }
245
246 /**
247 * Retrieve the full text of the file.
248 *
249 * @return the full text of the file
250 */
251 public CharSequence getFullText() {
252 return fullText;
253 }
254
255 /**
256 * Returns an array of all lines.
257 * {@code text.toLinesArray()} is equivalent to
258 * {@code text.toArray(new String[text.size()])}.
259 *
260 * @return an array of all lines of the text
261 */
262 public String[] toLinesArray() {
263 return lines.clone();
264 }
265
266 /**
267 * Determine line and column numbers in full text.
268 *
269 * @param pos the character position in the full text
270 * @return the line and column numbers of this character
271 */
272 public LineColumn lineColumn(int pos) {
273 final int[] lineBreakPositions = findLineBreaks();
274 int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
275 if (lineNo < 0) {
276 // we have: lineNo = -(insertion point) - 1
277 // we want: lineNo = (insertion point) - 1
278 lineNo = -lineNo - 2;
279 }
280 final int startOfLine = lineBreakPositions[lineNo];
281 final int columnNo = pos - startOfLine;
282 // now we have lineNo and columnNo, both starting at zero.
283 return new LineColumn(lineNo + 1, columnNo);
284 }
285
286 /**
287 * Find positions of line breaks in the full text.
288 *
289 * @return an array giving the first positions of each line.
290 */
291 private int[] findLineBreaks() {
292 if (lineBreaks == null) {
293 final int[] lineBreakPositions = new int[size() + 1];
294 lineBreakPositions[0] = 0;
295 int lineNo = 1;
296 final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
297 while (matcher.find()) {
298 lineBreakPositions[lineNo] = matcher.end();
299 lineNo++;
300 }
301 if (lineNo < lineBreakPositions.length) {
302 lineBreakPositions[lineNo] = fullText.length();
303 }
304 lineBreaks = lineBreakPositions;
305 }
306 return lineBreaks;
307 }
308
309 /**
310 * Counts the lines of the text.
311 *
312 * @return the number of lines in the text
313 */
314 public int size() {
315 return lines.length;
316 }
317
318 }