001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2020 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.regexp;
021
022import java.io.File;
023import java.util.regex.Pattern;
024
025import com.puppycrawl.tools.checkstyle.StatelessCheck;
026import com.puppycrawl.tools.checkstyle.api.AbstractFileSetCheck;
027import com.puppycrawl.tools.checkstyle.api.FileText;
028
029/**
030 * <p>
031 * Checks that a specified pattern matches across multiple lines in any file type.
032 * </p>
033 * <p>
034 * Rationale: This check can be used to when the regular expression can be span multiple lines.
035 * </p>
036 * <ul>
037 * <li>
038 * Property {@code format} - Specify the format of the regular expression to match.
039 * Type is {@code java.lang.String}.
040 * Default value is {@code "$."}.
041 * </li>
042 * <li>
043 * Property {@code message} - Specify the message which is used to notify about
044 * violations, if empty then default (hard-coded) message is used.
045 * Type is {@code java.lang.String}.
046 * Default value is {@code null}.
047 * </li>
048 * <li>
049 * Property {@code ignoreCase} - Control whether to ignore case when searching.
050 * Type is {@code boolean}.
051 * Default value is {@code false}.
052 * </li>
053 * <li>
054 * Property {@code minimum} - Specify the minimum number of matches required in each file.
055 * Type is {@code int}.
056 * Default value is {@code 0}.
057 * </li>
058 * <li>
059 * Property {@code maximum} - Specify the maximum number of matches required in each file.
060 * Type is {@code int}.
061 * Default value is {@code 0}.
062 * </li>
063 * <li>
064 * Property {@code matchAcrossLines} - Control whether to match expressions
065 * across multiple lines.
066 * Type is {@code boolean}.
067 * Default value is {@code false}.
068 * </li>
069 * <li>
070 * Property {@code fileExtensions} - Specify the file type extension of files to process.
071 * Type is {@code java.lang.String[]}.
072 * Default value is {@code all files}.
073 * </li>
074 * </ul>
075 * <p>
076 * To configure the check to find calls to print to the console:
077 * </p>
078 * <pre>
079 * &lt;module name="RegexpMultiline"&gt;
080 *   &lt;property name="format"
081 *     value="System\.(out)|(err)\.print(ln)?\("/&gt;
082 * &lt;/module&gt;
083 * </pre>
084 * <p>
085 * To configure the check to match text that spans multiple lines,
086 * like normal code in a Java file:
087 * </p>
088 * <pre>
089 * &lt;module name="RegexpMultiline"&gt;
090 *   &lt;property name="matchAcrossLines" value="true"/&gt;
091 *   &lt;property name="format" value="System\.out.*print\("/&gt;
092 * &lt;/module&gt;
093 * </pre>
094 * <p>
095 * Example of violation from the above config:
096 * </p>
097 * <pre>
098 * void method() {
099 *   System.out. // violation
100 *   print("Example");
101 *   System.out.
102 *   print("Example");
103 * }
104 * </pre>
105 * <p>
106 * Note: Beware of the greedy regular expression used in the above example.
107 * {@code .*} will match as much as possible and not produce multiple violations
108 * in the file if multiple groups of lines could match the expression. To prevent
109 * an expression being too greedy, avoid overusing matching all text or allow it
110 * to be optional, like {@code .*?}. Changing the example expression to not be
111 * greedy will allow multiple violations in the example to be found in the same file.
112 * </p>
113 *
114 * <p>
115 * To configure the check to restrict an empty file:
116 * </p>
117 * <pre>
118 * &lt;module name=&quot;RegexpMultiline&quot;&gt;
119 *     &lt;property name=&quot;format&quot; value=&quot;^\s*$&quot; /&gt;
120 *     &lt;property name=&quot;matchAcrossLines&quot; value=&quot;true&quot; /&gt;
121 *     &lt;property name=&quot;message&quot; value=&quot;Empty file is not allowed&quot; /&gt;
122 * &lt;/module&gt;
123 * </pre>
124 * <p>
125 * Example of violation from the above config:
126 * </p>
127 * <pre>
128 * /var/tmp$ cat -n Test.java
129 * 1
130 * 2
131 * 3
132 * 4
133 * </pre>
134 * <p>Result:</p>
135 * <pre>
136 * /var/tmp/Test.java // violation, a file must not be empty.
137 * </pre>
138 * <p>
139 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker}
140 * </p>
141 * <p>
142 * Violation Message Keys:
143 * </p>
144 * <ul>
145 * <li>
146 * {@code regexp.StackOverflowError}
147 * </li>
148 * <li>
149 * {@code regexp.empty}
150 * </li>
151 * <li>
152 * {@code regexp.exceeded}
153 * </li>
154 * <li>
155 * {@code regexp.minimum}
156 * </li>
157 * </ul>
158 *
159 * @since 5.0
160 */
161@StatelessCheck
162public class RegexpMultilineCheck extends AbstractFileSetCheck {
163
164    /** Specify the format of the regular expression to match. */
165    private String format = "$.";
166    /**
167     * Specify the message which is used to notify about violations,
168     * if empty then default (hard-coded) message is used.
169     */
170    private String message;
171    /** Specify the minimum number of matches required in each file. */
172    private int minimum;
173    /** Specify the maximum number of matches required in each file. */
174    private int maximum;
175    /** Control whether to ignore case when searching. */
176    private boolean ignoreCase;
177    /** Control whether to match expressions across multiple lines. */
178    private boolean matchAcrossLines;
179
180    /** The detector to use. */
181    private MultilineDetector detector;
182
183    @Override
184    public void beginProcessing(String charset) {
185        final DetectorOptions options = DetectorOptions.newBuilder()
186            .reporter(this)
187            .compileFlags(getRegexCompileFlags())
188            .format(format)
189            .message(message)
190            .minimum(minimum)
191            .maximum(maximum)
192            .ignoreCase(ignoreCase)
193            .build();
194        detector = new MultilineDetector(options);
195    }
196
197    @Override
198    protected void processFiltered(File file, FileText fileText) {
199        detector.processLines(fileText);
200    }
201
202    /**
203     * Retrieves the compile flags for the regular expression being built based
204     * on {@code matchAcrossLines}.
205     *
206     * @return The compile flags.
207     */
208    private int getRegexCompileFlags() {
209        final int result;
210
211        if (matchAcrossLines) {
212            result = Pattern.DOTALL;
213        }
214        else {
215            result = Pattern.MULTILINE;
216        }
217
218        return result;
219    }
220
221    /**
222     * Setter to specify the format of the regular expression to match.
223     *
224     * @param format the format of the regular expression to match.
225     */
226    public void setFormat(String format) {
227        this.format = format;
228    }
229
230    /**
231     * Setter to specify the message which is used to notify about violations,
232     * if empty then default (hard-coded) message is used.
233     *
234     * @param message the message to report for a match.
235     */
236    public void setMessage(String message) {
237        this.message = message;
238    }
239
240    /**
241     * Setter to specify the minimum number of matches required in each file.
242     *
243     * @param minimum the minimum number of matches required in each file.
244     */
245    public void setMinimum(int minimum) {
246        this.minimum = minimum;
247    }
248
249    /**
250     * Setter to specify the maximum number of matches required in each file.
251     *
252     * @param maximum the maximum number of matches required in each file.
253     */
254    public void setMaximum(int maximum) {
255        this.maximum = maximum;
256    }
257
258    /**
259     * Setter to control whether to ignore case when searching.
260     *
261     * @param ignoreCase whether to ignore case when searching.
262     */
263    public void setIgnoreCase(boolean ignoreCase) {
264        this.ignoreCase = ignoreCase;
265    }
266
267    /**
268     * Setter to control whether to match expressions across multiple lines.
269     *
270     * @param matchAcrossLines whether to match expressions across multiple lines.
271     */
272    public void setMatchAcrossLines(boolean matchAcrossLines) {
273        this.matchAcrossLines = matchAcrossLines;
274    }
275
276}