001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2020 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import java.util.ArrayList;
023import java.util.BitSet;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.regex.Pattern;
028
029import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
030import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
031import com.puppycrawl.tools.checkstyle.api.DetailAST;
032import com.puppycrawl.tools.checkstyle.api.TokenTypes;
033import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
034
035/**
036 * <p>
037 * Checks for multiple occurrences of the same string literal within a single file.
038 * </p>
039 * <p>
040 * Rationale: Code duplication makes maintenance more difficult, so it can be better
041 * to replace the multiple occurrences with a constant.
042 * </p>
043 * <ul>
044 * <li>
045 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
046 * to allow without generating a warning.
047 * Default value is {@code 1}.
048 * </li>
049 * <li>
050 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
051 * Default value is {@code "^""$"}.
052 * </li>
053 * <li>
054 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
055 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
056 * exclude syntactical contexts like annotations or static initializers from the check.
057 * Default value is {@code ANNOTATION}.
058 * </li>
059 * </ul>
060 * <p>
061 * To configure the check:
062 * </p>
063 * <pre>
064 * &lt;module name=&quot;MultipleStringLiterals&quot;/&gt;
065 * </pre>
066 * <p>
067 * To configure the check so that it allows two occurrences of each string:
068 * </p>
069 * <pre>
070 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
071 *   &lt;property name=&quot;allowedDuplicates&quot; value=&quot;2&quot;/&gt;
072 * &lt;/module&gt;
073 * </pre>
074 * <p>
075 * To configure the check so that it ignores ", " and empty strings:
076 * </p>
077 * <pre>
078 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
079 *   &lt;property name=&quot;ignoreStringsRegexp&quot;
080 *     value='^((&quot;&quot;)|(&quot;, &quot;))$'/&gt;
081 * &lt;/module&gt;
082 * </pre>
083 * <p>
084 * To configure the check so that it flags duplicate strings in all syntactical contexts,
085 * even in annotations like {@code @SuppressWarnings("unchecked")}:
086 * </p>
087 * <pre>
088 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
089 *   &lt;property name=&quot;ignoreOccurrenceContext&quot; value=&quot;&quot;/&gt;
090 * &lt;/module&gt;
091 * </pre>
092 *
093 * @since 3.5
094 */
095@FileStatefulCheck
096public class MultipleStringLiteralsCheck extends AbstractCheck {
097
098    /**
099     * A key is pointing to the warning message text in "messages.properties"
100     * file.
101     */
102    public static final String MSG_KEY = "multiple.string.literal";
103
104    /**
105     * The found strings and their tokens.
106     */
107    private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
108
109    /**
110     * Specify token type names where duplicate strings are ignored even if they
111     * don't match ignoredStringsRegexp. This allows you to exclude syntactical
112     * contexts like annotations or static initializers from the check.
113     */
114    private final BitSet ignoreOccurrenceContext = new BitSet();
115
116    /**
117     * Specify the maximum number of occurrences to allow without generating a warning.
118     */
119    private int allowedDuplicates = 1;
120
121    /**
122     * Specify RegExp for ignored strings (with quotation marks).
123     */
124    private Pattern ignoreStringsRegexp;
125
126    /**
127     * Construct an instance with default values.
128     */
129    public MultipleStringLiteralsCheck() {
130        setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
131        ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
132    }
133
134    /**
135     * Setter to specify the maximum number of occurrences to allow without generating a warning.
136     *
137     * @param allowedDuplicates The maximum number of duplicates.
138     */
139    public void setAllowedDuplicates(int allowedDuplicates) {
140        this.allowedDuplicates = allowedDuplicates;
141    }
142
143    /**
144     * Setter to specify RegExp for ignored strings (with quotation marks).
145     *
146     * @param ignoreStringsRegexp
147     *        regular expression pattern for ignored strings
148     * @noinspection WeakerAccess
149     */
150    public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
151        if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
152            this.ignoreStringsRegexp = null;
153        }
154        else {
155            this.ignoreStringsRegexp = ignoreStringsRegexp;
156        }
157    }
158
159    /**
160     * Setter to specify token type names where duplicate strings are ignored even
161     * if they don't match ignoredStringsRegexp. This allows you to exclude
162     * syntactical contexts like annotations or static initializers from the check.
163     *
164     * @param strRep the string representation of the tokens interested in
165     */
166    public final void setIgnoreOccurrenceContext(String... strRep) {
167        ignoreOccurrenceContext.clear();
168        for (final String s : strRep) {
169            final int type = TokenUtil.getTokenId(s);
170            ignoreOccurrenceContext.set(type);
171        }
172    }
173
174    @Override
175    public int[] getDefaultTokens() {
176        return getRequiredTokens();
177    }
178
179    @Override
180    public int[] getAcceptableTokens() {
181        return getRequiredTokens();
182    }
183
184    @Override
185    public int[] getRequiredTokens() {
186        return new int[] {TokenTypes.STRING_LITERAL};
187    }
188
189    @Override
190    public void visitToken(DetailAST ast) {
191        if (!isInIgnoreOccurrenceContext(ast)) {
192            final String currentString = ast.getText();
193            if (ignoreStringsRegexp == null || !ignoreStringsRegexp.matcher(currentString).find()) {
194                stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
195            }
196        }
197    }
198
199    /**
200     * Analyses the path from the AST root to a given AST for occurrences
201     * of the token types in {@link #ignoreOccurrenceContext}.
202     *
203     * @param ast the node from where to start searching towards the root node
204     * @return whether the path from the root node to ast contains one of the
205     *     token type in {@link #ignoreOccurrenceContext}.
206     */
207    private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
208        boolean isInIgnoreOccurrenceContext = false;
209        for (DetailAST token = ast;
210             token.getParent() != null;
211             token = token.getParent()) {
212            final int type = token.getType();
213            if (ignoreOccurrenceContext.get(type)) {
214                isInIgnoreOccurrenceContext = true;
215                break;
216            }
217        }
218        return isInIgnoreOccurrenceContext;
219    }
220
221    @Override
222    public void beginTree(DetailAST rootAST) {
223        stringMap.clear();
224    }
225
226    @Override
227    public void finishTree(DetailAST rootAST) {
228        for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
229            final List<DetailAST> hits = stringListEntry.getValue();
230            if (hits.size() > allowedDuplicates) {
231                final DetailAST firstFinding = hits.get(0);
232                log(firstFinding, MSG_KEY, stringListEntry.getKey(), hits.size());
233            }
234        }
235    }
236
237}