001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2020 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.regexp; 021 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 026import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 027import com.puppycrawl.tools.checkstyle.api.DetailAST; 028import com.puppycrawl.tools.checkstyle.api.FileContents; 029import com.puppycrawl.tools.checkstyle.api.FileText; 030import com.puppycrawl.tools.checkstyle.api.LineColumn; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032 033/** 034 * <p> 035 * Checks that a specified pattern exists, exists less than 036 * a set number of times, or does not exist in the file. 037 * </p> 038 * <p> 039 * This check combines all the functionality provided by 040 * <a href="https://checkstyle.org/config_header.html#RegexpHeader">RegexpHeader</a> 041 * except supplying the regular expression from a file. 042 * </p> 043 * <p> 044 * It differs from them in that it works in multiline mode. Its regular expression 045 * can span multiple lines and it checks this against the whole file at once. 046 * The others work in singleline mode. Their single or multiple regular expressions 047 * can only span one line. They check each of these against each line in the file in turn. 048 * </p> 049 * <p> 050 * <b>Note:</b> Because of the different mode of operation there may be some 051 * changes in the regular expressions used to achieve a particular end. 052 * </p> 053 * <p> 054 * In multiline mode... 055 * </p> 056 * <ul> 057 * <li> 058 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 059 * </li> 060 * <li> 061 * For beginning of the input use {@code \A}. 062 * </li> 063 * <li> 064 * {@code $} means the end of a line, as opposed to the end of the input. 065 * </li> 066 * <li> 067 * For end of input use {@code \Z}. 068 * </li> 069 * <li> 070 * Each line in the file is terminated with a line feed character. 071 * </li> 072 * </ul> 073 * <p> 074 * <b>Note:</b> Not all regular expression engines are created equal. 075 * Some provide extra functions that others do not and some elements 076 * of the syntax may vary. This check makes use of the 077 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html"> 078 * java.util.regex package</a>; please check its documentation for details 079 * of how to construct a regular expression to achieve a particular goal. 080 * </p> 081 * <p> 082 * <b>Note:</b> When entering a regular expression as a parameter in 083 * the XML config file you must also take into account the XML rules. e.g. 084 * if you want to match a < symbol you need to enter &lt;. 085 * The regular expression should be entered on one line. 086 * </p> 087 * <ul> 088 * <li> 089 * Property {@code format} - Specify the pattern to match against. 090 * Type is {@code java.util.regex.Pattern}. 091 * Default value is {@code "^$"}(empty). 092 * </li> 093 * <li> 094 * Property {@code message} - Specify message which is used to notify about 095 * violations, if empty then the default (hard-coded) message is used. 096 * Type is {@code java.lang.String}. 097 * Default value is {@code null}. 098 * </li> 099 * <li> 100 * Property {@code illegalPattern} - Control whether the pattern is required or illegal. 101 * Type is {@code boolean}. 102 * Default value is {@code false}. 103 * </li> 104 * <li> 105 * Property {@code duplicateLimit} - Control whether to check for duplicates 106 * of a required pattern, any negative value means no checking for duplicates, 107 * any positive value is used as the maximum number of allowed duplicates, 108 * if the limit is exceeded violations will be logged. 109 * Type is {@code int}. 110 * Default value is {@code 0}. 111 * </li> 112 * <li> 113 * Property {@code errorLimit} - Specify the maximum number of violations before 114 * the check will abort. 115 * Type is {@code int}. 116 * Default value is {@code 100}. 117 * </li> 118 * <li> 119 * Property {@code ignoreComments} - Control whether to ignore matches found within comments. 120 * Type is {@code boolean}. 121 * Default value is {@code false}. 122 * </li> 123 * </ul> 124 * <p> 125 * The following examples are mainly copied from the other 3 checks mentioned above, 126 * to show how the same results can be achieved using this check in place of them. 127 * </p> 128 * <p> 129 * <b>To use like Required Regexp check:</b> 130 * </p> 131 * <p> 132 * An example of how to configure the check to make sure a copyright statement 133 * is included in the file: 134 * </p> 135 * <p> 136 * The statement. 137 * </p> 138 * <pre> 139 * // This code is copyrighted 140 * </pre> 141 * <p> 142 * The check. 143 * </p> 144 * <pre> 145 * <module name="Regexp"> 146 * <property name="format" value="// This code is copyrighted"/> 147 * </module> 148 * </pre> 149 * <p> 150 * Your statement may be multiline. 151 * </p> 152 * <pre> 153 * // This code is copyrighted 154 * // (c) MyCompany 155 * </pre> 156 * <p> 157 * Then the check would be. 158 * </p> 159 * <pre> 160 * <module name="Regexp"> 161 * <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/> 162 * </module> 163 * </pre> 164 * <p> 165 * <b>Note:</b> To search for parentheses () in a regular expression you must 166 * escape them like \(\). This is required by the regexp engine, otherwise it will 167 * think they are special instruction characters. 168 * </p> 169 * <p> 170 * And to make sure it appears only once: 171 * </p> 172 * <pre> 173 * <module name="Regexp"> 174 * <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/> 175 * <property name="duplicateLimit" value="0"/> 176 * </module> 177 * </pre> 178 * <p> 179 * It can also be useful to attach a meaningful message to the check: 180 * </p> 181 * <pre> 182 * <module name="Regexp"> 183 * <property name="format" value="// This code is copyrighted\n// \(c\) MyCompany"/> 184 * <property name="message" value="Copyright"/> 185 * </module> 186 * </pre> 187 * <p> 188 * <b>To use like illegal regexp check:</b> 189 * </p> 190 * <p> 191 * An example of how to configure the check to make sure there are no calls to 192 * {@code System.out.println}: 193 * </p> 194 * <pre> 195 * <module name="Regexp"> 196 * <!-- . matches any character, so we need to escape it and use \. to match dots. --> 197 * <property name="format" value="System\.out\.println"/> 198 * <property name="illegalPattern" value="true"/> 199 * </module> 200 * </pre> 201 * <p> 202 * You may want to make the above check ignore comments, like this: 203 * </p> 204 * <pre> 205 * <module name="Regexp"> 206 * <property name="format" value="System\.out\.println"/> 207 * <property name="illegalPattern" value="true"/> 208 * <property name="ignoreComments" value="true"/> 209 * </module> 210 * </pre> 211 * <p> 212 * An example of how to configure the check to find trailing whitespace at the end of a line: 213 * </p> 214 * <pre> 215 * <module name="Regexp"> 216 * <property name="format" value="[ \t]+$"/> 217 * <property name="illegalPattern" value="true"/> 218 * <property name="message" value="Trailing whitespace"/> 219 * </module> 220 * </pre> 221 * <p> 222 * An example of how to configure the check to find case-insensitive occurrences of "debug": 223 * </p> 224 * <pre> 225 * <module name="Regexp"> 226 * <property name="format" value="(?i)debug"/> 227 * <property name="illegalPattern" value="true"/> 228 * </module> 229 * </pre> 230 * <p> 231 * <b>Note:</b> The (?i) at the beginning of the regular expression tells the 232 * regexp engine to ignore the case. 233 * </p> 234 * <p> 235 * There is also a feature to limit the number of violations reported. 236 * When the limit is reached the check aborts with a message reporting that 237 * the limit has been reached. The default limit setting is 100, 238 * but this can be change as shown in the following example. 239 * </p> 240 * <pre> 241 * <module name="Regexp"> 242 * <property name="format" value="(?i)debug"/> 243 * <property name="illegalPattern" value="true"/> 244 * <property name="errorLimit" value="1000"/> 245 * </module> 246 * </pre> 247 * <p> 248 * <b>To use like <a href="https://checkstyle.org/config_header.html#RegexpHeader"> 249 * RegexpHeader</a>:</b> 250 * </p> 251 * <p> 252 * To configure the check to verify that each file starts with the following multiline header. 253 * </p> 254 * <p> 255 * Note the following: 256 * </p> 257 * <ul> 258 * <li> 259 * \A means the start of the file. 260 * </li> 261 * <li> 262 * The date can be any 4 digit number. 263 * </li> 264 * </ul> 265 * <pre> 266 * // Copyright (C) 2004 MyCompany 267 * // All rights reserved 268 * </pre> 269 * <pre> 270 * <module name="Regexp"> 271 * <property 272 * name="format" 273 * value="\A// Copyright \(C\) \d\d\d\d MyCompany\n// All rights reserved"/> 274 * </module> 275 * </pre> 276 * <p> 277 * A more complex example. Note how the import and javadoc multilines are handled, 278 * there can be any number of them. 279 * </p> 280 * <pre> 281 * /////////////////////////////////////////////////////////////////////// 282 * // checkstyle: 283 * // Checks Java source code for adherence to a set of rules. 284 * // Copyright (C) 2004 Oliver Burn 285 * // Last modification by $Author A.N.Other$ 286 * /////////////////////////////////////////////////////////////////////// 287 * 288 * package com.puppycrawl.checkstyle; 289 * 290 * import java.util.thing1; 291 * import java.util.thing2; 292 * import java.util.thing3; 293 * 294 * /** 295 * * javadoc line 1 296 * * javadoc line 2 297 * * javadoc line 3 298 * */ 299 * </pre> 300 * <pre> 301 * <module name="Regexp"> 302 * <property 303 * name="format" 304 * value="\A/{71}\n// checkstyle:\n// Checks Java source code for 305 * adherence to a set of rules\.\n// Copyright \(C\) \d\d\d\d Oliver Burn\n 306 * // Last modification by \$Author.*\$\n/{71}\n\npackage [\w\.]*;\n\n 307 * (import [\w\.]*;\n)*\n/\*\*\n( \*[^/]*\n)* \*/"/> 308 * </module> 309 * </pre> 310 * <p> 311 * <b>More examples:</b> 312 * </p> 313 * <p> 314 * The next 2 examples deal with the following example Java source file: 315 * </p> 316 * <pre> 317 * /* 318 * * PID.java 319 * * 320 * * Copyright (c) 2001 ACME 321 * * 123 Some St. 322 * * Somewhere. 323 * * 324 * * This software is the confidential and proprietary information of ACME. 325 * * ("Confidential Information"). You shall not disclose such 326 * * Confidential Information and shall use it only in accordance with 327 * * the terms of the license agreement you entered into with ACME. 328 * * 329 * * $Log: config_misc.xml,v $ 330 * * Revision 1.7 2007/01/16 12:16:35 oburn 331 * * Removing all reference to mailing lists 332 * * 333 * * Revision 1.6 2005/12/25 16:13:10 o_sukhodolsky 334 * * Fix for rfe 1248106 (TYPECAST is now accepted by NoWhitespaceAfter) 335 * * 336 * * Fix for rfe 953266 (thanks to Paul Guyot (pguyot) for submitting patch) 337 * * IllegalType can be configured to accept some abstract classes which 338 * * matches to regexp of illegal type names (property legalAbstractClassNames) 339 * * 340 * * TrailingComment now can be configured to accept some trailing comments 341 * * (such as NOI18N) (property legalComment, rfe 1385344). 342 * * 343 * * Revision 1.5 2005/11/06 11:54:12 oburn 344 * * Incorporate excellent patch [ 1344344 ] Consolidation of regexp checks. 345 * * 346 * * Revision 1.3.8.1 2005/10/11 14:26:32 someone 347 * * Fix for bug 251. The broken bit is fixed 348 * */ 349 * 350 * package com.acme.tools; 351 * 352 * import com.acme.thing1; 353 * import com.acme.thing2; 354 * import com.acme.thing3; 355 * 356 * /** 357 * * 358 * * <P> 359 * * <I>This software is the confidential and proprietary information of 360 * * ACME (<B>"Confidential Information"</B>). You shall not 361 * * disclose such Confidential Information and shall use it only in 362 * * accordance with the terms of the license agreement you entered into 363 * * with ACME.</I> 364 * * </P> 365 * * 366 * * &#169; copyright 2002 ACME 367 * * 368 * * @author Some Body 369 * */ 370 * public class PID extends StateMachine implements WebObject.Constants { 371 * 372 * /** javadoc. */ 373 * public static final int A_SETPOINT = 1; 374 * . 375 * . 376 * . 377 * } // class PID 378 * </pre> 379 * <p> 380 * This checks for the presence of the header, the first 16 lines. 381 * </p> 382 * <p> 383 * Note the following: 384 * </p> 385 * <ul> 386 * <li> 387 * Line 2 and 13 contain the file name. These are checked to make sure they 388 * are the same, and that they match the class name. 389 * </li> 390 * <li> 391 * The date can be any 4 digit number. 392 * </li> 393 * </ul> 394 * <pre> 395 * <module name="Regexp"> 396 * <property 397 * name="format" 398 * value="\A/\*\n \* (\w*)\.java\n \*\n \* Copyright \(c\) 399 * \d\d\d\d ACME\n \* 123 Some St\.\n \* Somewhere\.\n \*\n 400 * \* This software is the confidential and proprietary information 401 * of ACME\.\n \* \(&quot;Confidential Information&quot;\)\. You 402 * shall not disclose such\n \* Confidential Information and shall 403 * use it only in accordance with\n \* the terms of the license 404 * agreement you entered into with ACME\.\n \*\n 405 * \* \$Log: config_misc\.xml,v $ 406 * \* Revision 1\.7 2007/01/16 12:16:35 oburn 407 * \* Removing all reference to mailing lists 408 * \* \ 409 * \* Revision 1.6 2005/12/25 16:13:10 o_sukhodolsky 410 * \* Fix for rfe 1248106 \(TYPECAST is now accepted by NoWhitespaceAfter\) 411 * \* \ 412 * \* Fix for rfe 953266 \(thanks to Paul Guyot \(pguyot\) for submitting patch\) 413 * \* IllegalType can be configured to accept some abstract classes which 414 * \* matches to regexp of illegal type names \(property legalAbstractClassNames\) 415 * \* 416 * \* TrailingComment now can be configured to accept some trailing comments 417 * \* \(such as NOI18N\) \(property legalComment, rfe 1385344\). 418 * \* 419 * \* Revision 1.5 2005/11/06 11:54:12 oburn 420 * \* Incorporate excellent patch \[ 1344344 \] Consolidation of regexp checks. 421 * \* \\n(.*\n)*([\w|\s]*( class | interface )\1)"/> 422 * <property name="message" value="Correct header not found"/> 423 * </module> 424 * </pre> 425 * <p> 426 * This checks for the presence of a copyright notice within the class javadoc, lines 24 to 37. 427 * </p> 428 * <pre> 429 * <module name="Regexp"> 430 * <property 431 * name="format" 432 * value="(/\*\*\n)( \*.*\n)*( \* <P>\n \* <I> 433 * This software is the confidential and proprietary information of\n 434 * \* ACME \(<B>&quot;Confidential Information&quot;</B> 435 * \)\. You shall not\n \* disclose such Confidential Information 436 * and shall use it only in\n \* accordance with the terms of the 437 * license agreement you entered into\n \* with ACME\.</I>\n 438 * \* </P>\n \*\n \* &#169; copyright \d\d\d\d ACME\n 439 * \*\n \* @author .*)(\n\s\*.*)*/\n[\w|\s]*( class | interface )"/> 440 * <property name="message" 441 * value="Copyright in class/interface Javadoc"/> 442 * <property name="duplicateLimit" value="0"/> 443 * </module> 444 * </pre> 445 * <p> 446 * <b>Note:</b> To search for things that mean something in XML, like < 447 * you need to escape them like &lt;. This is required so the XML parser 448 * does not act on them, but instead passes the correct character to the regexp engine. 449 * </p> 450 * <p> 451 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 452 * </p> 453 * <p> 454 * Violation Message Keys: 455 * </p> 456 * <ul> 457 * <li> 458 * {@code duplicate.regexp} 459 * </li> 460 * <li> 461 * {@code illegal.regexp} 462 * </li> 463 * <li> 464 * {@code required.regexp} 465 * </li> 466 * </ul> 467 * 468 * @since 4.0 469 */ 470@FileStatefulCheck 471public class RegexpCheck extends AbstractCheck { 472 473 /** 474 * A key is pointing to the warning message text in "messages.properties" 475 * file. 476 */ 477 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 478 479 /** 480 * A key is pointing to the warning message text in "messages.properties" 481 * file. 482 */ 483 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 484 485 /** 486 * A key is pointing to the warning message text in "messages.properties" 487 * file. 488 */ 489 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 490 491 /** Default duplicate limit. */ 492 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 493 494 /** Default error report limit. */ 495 private static final int DEFAULT_ERROR_LIMIT = 100; 496 497 /** Error count exceeded message. */ 498 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 499 "The error limit has been exceeded, " 500 + "the check is aborting, there may be more unreported errors."; 501 502 /** 503 * Specify message which is used to notify about violations, 504 * if empty then the default (hard-coded) message is used. 505 */ 506 private String message; 507 508 /** Control whether to ignore matches found within comments. */ 509 private boolean ignoreComments; 510 511 /** Control whether the pattern is required or illegal. */ 512 private boolean illegalPattern; 513 514 /** Specify the maximum number of violations before the check will abort. */ 515 private int errorLimit = DEFAULT_ERROR_LIMIT; 516 517 /** 518 * Control whether to check for duplicates of a required pattern, 519 * any negative value means no checking for duplicates, 520 * any positive value is used as the maximum number of allowed duplicates, 521 * if the limit is exceeded violations will be logged. 522 */ 523 private int duplicateLimit; 524 525 /** Boolean to say if we should check for duplicates. */ 526 private boolean checkForDuplicates; 527 528 /** Tracks number of matches made. */ 529 private int matchCount; 530 531 /** Tracks number of errors. */ 532 private int errorCount; 533 534 /** Specify the pattern to match against. */ 535 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 536 537 /** The matcher. */ 538 private Matcher matcher; 539 540 /** 541 * Setter to specify message which is used to notify about violations, 542 * if empty then the default (hard-coded) message is used. 543 * 544 * @param message custom message which should be used in report. 545 */ 546 public void setMessage(String message) { 547 this.message = message; 548 } 549 550 /** 551 * Setter to control whether to ignore matches found within comments. 552 * 553 * @param ignoreComments True if comments should be ignored. 554 */ 555 public void setIgnoreComments(boolean ignoreComments) { 556 this.ignoreComments = ignoreComments; 557 } 558 559 /** 560 * Setter to control whether the pattern is required or illegal. 561 * 562 * @param illegalPattern True if pattern is not allowed. 563 */ 564 public void setIllegalPattern(boolean illegalPattern) { 565 this.illegalPattern = illegalPattern; 566 } 567 568 /** 569 * Setter to specify the maximum number of violations before the check will abort. 570 * 571 * @param errorLimit the number of errors to report. 572 */ 573 public void setErrorLimit(int errorLimit) { 574 this.errorLimit = errorLimit; 575 } 576 577 /** 578 * Setter to control whether to check for duplicates of a required pattern, 579 * any negative value means no checking for duplicates, 580 * any positive value is used as the maximum number of allowed duplicates, 581 * if the limit is exceeded violations will be logged. 582 * 583 * @param duplicateLimit negative values mean no duplicate checking, 584 * any positive value is used as the limit. 585 */ 586 public void setDuplicateLimit(int duplicateLimit) { 587 this.duplicateLimit = duplicateLimit; 588 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 589 } 590 591 /** 592 * Setter to specify the pattern to match against. 593 * 594 * @param pattern the new pattern 595 */ 596 public final void setFormat(Pattern pattern) { 597 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 598 } 599 600 @Override 601 public int[] getDefaultTokens() { 602 return getRequiredTokens(); 603 } 604 605 @Override 606 public int[] getAcceptableTokens() { 607 return getRequiredTokens(); 608 } 609 610 @Override 611 public int[] getRequiredTokens() { 612 return CommonUtil.EMPTY_INT_ARRAY; 613 } 614 615 @Override 616 public void beginTree(DetailAST rootAST) { 617 matcher = format.matcher(getFileContents().getText().getFullText()); 618 matchCount = 0; 619 errorCount = 0; 620 findMatch(); 621 } 622 623 /** Recursive method that finds the matches. */ 624 private void findMatch() { 625 final boolean foundMatch = matcher.find(); 626 if (foundMatch) { 627 final FileText text = getFileContents().getText(); 628 final LineColumn start = text.lineColumn(matcher.start()); 629 final int startLine = start.getLine(); 630 631 final boolean ignore = isIgnore(startLine, text, start); 632 633 if (!ignore) { 634 matchCount++; 635 if (illegalPattern || checkForDuplicates 636 && matchCount - 1 > duplicateLimit) { 637 errorCount++; 638 logMessage(startLine); 639 } 640 } 641 if (canContinueValidation(ignore)) { 642 findMatch(); 643 } 644 } 645 else if (!illegalPattern && matchCount == 0) { 646 logMessage(0); 647 } 648 } 649 650 /** 651 * Check if we can stop validation. 652 * 653 * @param ignore flag 654 * @return true is we can continue 655 */ 656 private boolean canContinueValidation(boolean ignore) { 657 return errorCount <= errorLimit - 1 658 && (ignore || illegalPattern || checkForDuplicates); 659 } 660 661 /** 662 * Detect ignore situation. 663 * 664 * @param startLine position of line 665 * @param text file text 666 * @param start line column 667 * @return true is that need to be ignored 668 */ 669 private boolean isIgnore(int startLine, FileText text, LineColumn start) { 670 final LineColumn end; 671 if (matcher.end() == 0) { 672 end = text.lineColumn(0); 673 } 674 else { 675 end = text.lineColumn(matcher.end() - 1); 676 } 677 boolean ignore = false; 678 if (ignoreComments) { 679 final FileContents theFileContents = getFileContents(); 680 final int startColumn = start.getColumn(); 681 final int endLine = end.getLine(); 682 final int endColumn = end.getColumn(); 683 ignore = theFileContents.hasIntersectionWithComment(startLine, 684 startColumn, endLine, endColumn); 685 } 686 return ignore; 687 } 688 689 /** 690 * Displays the right message. 691 * 692 * @param lineNumber the line number the message relates to. 693 */ 694 private void logMessage(int lineNumber) { 695 String msg; 696 697 if (message == null || message.isEmpty()) { 698 msg = format.pattern(); 699 } 700 else { 701 msg = message; 702 } 703 704 if (errorCount >= errorLimit) { 705 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 706 } 707 708 if (illegalPattern) { 709 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 710 } 711 else { 712 if (lineNumber > 0) { 713 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 714 } 715 else { 716 log(lineNumber, MSG_REQUIRED_REGEXP, msg); 717 } 718 } 719 } 720 721}