001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2020 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import java.util.ArrayList;
023import java.util.BitSet;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.regex.Pattern;
028
029import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
030import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
031import com.puppycrawl.tools.checkstyle.api.DetailAST;
032import com.puppycrawl.tools.checkstyle.api.TokenTypes;
033import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
034
035/**
036 * <p>
037 * Checks for multiple occurrences of the same string literal within a single file.
038 * </p>
039 * <p>
040 * Rationale: Code duplication makes maintenance more difficult, so it can be better
041 * to replace the multiple occurrences with a constant.
042 * </p>
043 * <ul>
044 * <li>
045 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
046 * to allow without generating a warning.
047 * Type is {@code int}.
048 * Default value is {@code 1}.
049 * </li>
050 * <li>
051 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
052 * Type is {@code java.util.regex.Pattern}.
053 * Default value is {@code "^""$"}.
054 * </li>
055 * <li>
056 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
057 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
058 * exclude syntactical contexts like annotations or static initializers from the check.
059 * Type is {@code int[]}.
060 * Default value is {@code ANNOTATION}.
061 * </li>
062 * </ul>
063 * <p>
064 * To configure the check:
065 * </p>
066 * <pre>
067 * &lt;module name=&quot;MultipleStringLiterals&quot;/&gt;
068 * </pre>
069 * <p>
070 * To configure the check so that it allows two occurrences of each string:
071 * </p>
072 * <pre>
073 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
074 *   &lt;property name=&quot;allowedDuplicates&quot; value=&quot;2&quot;/&gt;
075 * &lt;/module&gt;
076 * </pre>
077 * <p>
078 * To configure the check so that it ignores ", " and empty strings:
079 * </p>
080 * <pre>
081 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
082 *   &lt;property name=&quot;ignoreStringsRegexp&quot;
083 *     value='^((&quot;&quot;)|(&quot;, &quot;))$'/&gt;
084 * &lt;/module&gt;
085 * </pre>
086 * <p>
087 * To configure the check so that it flags duplicate strings in all syntactical contexts,
088 * even in annotations like {@code @SuppressWarnings("unchecked")}:
089 * </p>
090 * <pre>
091 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
092 *   &lt;property name=&quot;ignoreOccurrenceContext&quot; value=&quot;&quot;/&gt;
093 * &lt;/module&gt;
094 * </pre>
095 * <p>
096 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
097 * </p>
098 * <p>
099 * Violation Message Keys:
100 * </p>
101 * <ul>
102 * <li>
103 * {@code multiple.string.literal}
104 * </li>
105 * </ul>
106 *
107 * @since 3.5
108 */
109@FileStatefulCheck
110public class MultipleStringLiteralsCheck extends AbstractCheck {
111
112    /**
113     * A key is pointing to the warning message text in "messages.properties"
114     * file.
115     */
116    public static final String MSG_KEY = "multiple.string.literal";
117
118    /**
119     * The found strings and their tokens.
120     */
121    private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
122
123    /**
124     * Specify token type names where duplicate strings are ignored even if they
125     * don't match ignoredStringsRegexp. This allows you to exclude syntactical
126     * contexts like annotations or static initializers from the check.
127     */
128    private final BitSet ignoreOccurrenceContext = new BitSet();
129
130    /**
131     * Specify the maximum number of occurrences to allow without generating a warning.
132     */
133    private int allowedDuplicates = 1;
134
135    /**
136     * Specify RegExp for ignored strings (with quotation marks).
137     */
138    private Pattern ignoreStringsRegexp;
139
140    /**
141     * Construct an instance with default values.
142     */
143    public MultipleStringLiteralsCheck() {
144        setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
145        ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
146    }
147
148    /**
149     * Setter to specify the maximum number of occurrences to allow without generating a warning.
150     *
151     * @param allowedDuplicates The maximum number of duplicates.
152     */
153    public void setAllowedDuplicates(int allowedDuplicates) {
154        this.allowedDuplicates = allowedDuplicates;
155    }
156
157    /**
158     * Setter to specify RegExp for ignored strings (with quotation marks).
159     *
160     * @param ignoreStringsRegexp
161     *        regular expression pattern for ignored strings
162     * @noinspection WeakerAccess
163     */
164    public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
165        if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
166            this.ignoreStringsRegexp = null;
167        }
168        else {
169            this.ignoreStringsRegexp = ignoreStringsRegexp;
170        }
171    }
172
173    /**
174     * Setter to specify token type names where duplicate strings are ignored even
175     * if they don't match ignoredStringsRegexp. This allows you to exclude
176     * syntactical contexts like annotations or static initializers from the check.
177     *
178     * @param strRep the string representation of the tokens interested in
179     */
180    public final void setIgnoreOccurrenceContext(String... strRep) {
181        ignoreOccurrenceContext.clear();
182        for (final String s : strRep) {
183            final int type = TokenUtil.getTokenId(s);
184            ignoreOccurrenceContext.set(type);
185        }
186    }
187
188    @Override
189    public int[] getDefaultTokens() {
190        return getRequiredTokens();
191    }
192
193    @Override
194    public int[] getAcceptableTokens() {
195        return getRequiredTokens();
196    }
197
198    @Override
199    public int[] getRequiredTokens() {
200        return new int[] {TokenTypes.STRING_LITERAL};
201    }
202
203    @Override
204    public void visitToken(DetailAST ast) {
205        if (!isInIgnoreOccurrenceContext(ast)) {
206            final String currentString = ast.getText();
207            if (ignoreStringsRegexp == null || !ignoreStringsRegexp.matcher(currentString).find()) {
208                stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
209            }
210        }
211    }
212
213    /**
214     * Analyses the path from the AST root to a given AST for occurrences
215     * of the token types in {@link #ignoreOccurrenceContext}.
216     *
217     * @param ast the node from where to start searching towards the root node
218     * @return whether the path from the root node to ast contains one of the
219     *     token type in {@link #ignoreOccurrenceContext}.
220     */
221    private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
222        boolean isInIgnoreOccurrenceContext = false;
223        for (DetailAST token = ast;
224             token.getParent() != null;
225             token = token.getParent()) {
226            final int type = token.getType();
227            if (ignoreOccurrenceContext.get(type)) {
228                isInIgnoreOccurrenceContext = true;
229                break;
230            }
231        }
232        return isInIgnoreOccurrenceContext;
233    }
234
235    @Override
236    public void beginTree(DetailAST rootAST) {
237        stringMap.clear();
238    }
239
240    @Override
241    public void finishTree(DetailAST rootAST) {
242        for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
243            final List<DetailAST> hits = stringListEntry.getValue();
244            if (hits.size() > allowedDuplicates) {
245                final DetailAST firstFinding = hits.get(0);
246                log(firstFinding, MSG_KEY, stringListEntry.getKey(), hits.size());
247            }
248        }
249    }
250
251}