View Javadoc
1   package org.codehaus.plexus.util;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.util.ArrayList;
21  import java.util.Comparator;
22  import java.util.List;
23  
24  /**
25   * Scan a directory tree for files, with specified inclusions and exclusions.
26   */
27  public abstract class AbstractScanner implements Scanner {
28      /**
29       * Patterns which should be excluded by default, like SCM files
30       * <ul>
31       * <li>Misc: &#42;&#42;/&#42;~, &#42;&#42;/#&#42;#, &#42;&#42;/.#&#42;, &#42;&#42;/%&#42;%, &#42;&#42;/._&#42;</li>
32       * <li>CVS: &#42;&#42;/CVS, &#42;&#42;/CVS/&#42;&#42;</li>
33       * <li>RCS: &#42;&#42;/RCS, &#42;&#42;/RCS/&#42;&#42;</li>
34       * <li>SCCS: &#42;&#42;/SCCS, &#42;&#42;/SCCS/&#42;&#42;</li>
35       * <li>VSSercer: &#42;&#42;/vssver.scc</li>
36       * <li>MKS: &#42;&#42;/project.pj</li>
37       * <li>SVN: &#42;&#42;/.svn, &#42;&#42;/.svn/&#42;&#42;</li>
38       * <li>GNU: &#42;&#42;/.arch-ids, &#42;&#42;/.arch-ids/&#42;&#42;</li>
39       * <li>Bazaar: &#42;&#42;/.bzr, &#42;&#42;/.bzr/&#42;&#42;</li>
40       * <li>SurroundSCM: &#42;&#42;/.MySCMServerInfo</li>
41       * <li>Mac: &#42;&#42;/.DS_Store</li>
42       * <li>Serena Dimension: &#42;&#42;/.metadata, &#42;&#42;/.metadata/&#42;&#42;</li>
43       * <li>Mercurial: &#42;&#42;/.hg, &#42;&#42;/.hg/&#42;&#42;</li>
44       * <li>Git: &#42;&#42;/.git, &#42;&#42;/.git/&#42;&#42;</li>
45       * <li>Bitkeeper: &#42;&#42;/BitKeeper, &#42;&#42;/BitKeeper/&#42;&#42;, &#42;&#42;/ChangeSet,
46       * &#42;&#42;/ChangeSet/&#42;&#42;</li>
47       * <li>Darcs: &#42;&#42;/_darcs, &#42;&#42;/_darcs/&#42;&#42;, &#42;&#42;/.darcsrepo,
48       * &#42;&#42;/.darcsrepo/&#42;&#42;&#42;&#42;/-darcs-backup&#42;, &#42;&#42;/.darcs-temp-mail
49       * </ul>
50       *
51       * @see #addDefaultExcludes()
52       */
53      public static final String[] DEFAULTEXCLUDES = {
54          // Miscellaneous typical temporary files
55          "**/*~",
56          "**/#*#",
57          "**/.#*",
58          "**/%*%",
59          "**/._*",
60  
61          // CVS
62          "**/CVS",
63          "**/CVS/**",
64  
65          // RCS
66          "**/RCS",
67          "**/RCS/**",
68  
69          // SCCS
70          "**/SCCS",
71          "**/SCCS/**",
72  
73          // Visual SourceSafe
74          "**/vssver.scc",
75  
76          // MKS
77          "**/project.pj",
78  
79          // Subversion
80          "**/.svn",
81          "**/.svn/**",
82  
83          // Arch
84          "**/.arch-ids",
85          "**/.arch-ids/**",
86  
87          // Bazaar
88          "**/.bzr",
89          "**/.bzr/**",
90  
91          // SurroundSCM
92          "**/.MySCMServerInfo",
93  
94          // Mac
95          "**/.DS_Store",
96  
97          // Serena Dimensions Version 10
98          "**/.metadata",
99          "**/.metadata/**",
100 
101         // Mercurial
102         "**/.hg",
103         "**/.hg/**",
104 
105         // git
106         "**/.git",
107         "**/.git/**",
108 
109         // BitKeeper
110         "**/BitKeeper",
111         "**/BitKeeper/**",
112         "**/ChangeSet",
113         "**/ChangeSet/**",
114 
115         // darcs
116         "**/_darcs",
117         "**/_darcs/**",
118         "**/.darcsrepo",
119         "**/.darcsrepo/**",
120         "**/-darcs-backup*",
121         "**/.darcs-temp-mail"
122     };
123 
124     /**
125      * The patterns for the files to be included.
126      */
127     protected String[] includes;
128 
129     private MatchPatterns includesPatterns;
130 
131     /**
132      * The patterns for the files to be excluded.
133      */
134     protected String[] excludes;
135 
136     private MatchPatterns excludesPatterns;
137 
138     /**
139      * Whether or not the file system should be treated as a case sensitive one.
140      */
141     protected boolean isCaseSensitive = true;
142 
143     /**
144      * @since 3.3.0
145      */
146     protected Comparator<String> filenameComparator;
147 
148     /**
149      * Sets whether or not the file system should be regarded as case sensitive.
150      *
151      * @param isCaseSensitive whether or not the file system should be regarded as a case sensitive one
152      */
153     public void setCaseSensitive(boolean isCaseSensitive) {
154         this.isCaseSensitive = isCaseSensitive;
155     }
156 
157     /**
158      * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p>
159      *
160      * <p>This is not a general purpose test and should only be used if you can live with false positives. For example,
161      * <code>pattern=**\a</code> and <code>str=b</code> will yield <code>true</code>.</p>
162      *
163      * @param pattern The pattern to match against. Must not be <code>null</code>.
164      * @param str The path to match, as a String. Must not be <code>null</code>.
165      * @return whether or not a given path matches the start of a given pattern up to the first "**".
166      */
167     protected static boolean matchPatternStart(String pattern, String str) {
168         return SelectorUtils.matchPatternStart(pattern, str);
169     }
170 
171     /**
172      * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p>
173      *
174      * <p>This is not a general purpose test and should only be used if you can live with false positives. For example,
175      * <code>pattern=**\a</code> and <code>str=b</code> will yield <code>true</code>.</p>
176      *
177      * @param pattern The pattern to match against. Must not be <code>null</code>.
178      * @param str The path to match, as a String. Must not be <code>null</code>.
179      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
180      * @return whether or not a given path matches the start of a given pattern up to the first "**".
181      */
182     protected static boolean matchPatternStart(String pattern, String str, boolean isCaseSensitive) {
183         return SelectorUtils.matchPatternStart(pattern, str, isCaseSensitive);
184     }
185 
186     /**
187      * Tests whether or not a given path matches a given pattern.
188      *
189      * @param pattern The pattern to match against. Must not be <code>null</code>.
190      * @param str The path to match, as a String. Must not be <code>null</code>.
191      * @return <code>true</code> if the pattern matches against the string, or <code>false</code> otherwise.
192      */
193     protected static boolean matchPath(String pattern, String str) {
194         return SelectorUtils.matchPath(pattern, str);
195     }
196 
197     /**
198      * Tests whether or not a given path matches a given pattern.
199      *
200      * @param pattern The pattern to match against. Must not be <code>null</code>.
201      * @param str The path to match, as a String. Must not be <code>null</code>.
202      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
203      * @return <code>true</code> if the pattern matches against the string, or <code>false</code> otherwise.
204      */
205     protected static boolean matchPath(String pattern, String str, boolean isCaseSensitive) {
206         return SelectorUtils.matchPath(pattern, str, isCaseSensitive);
207     }
208 
209     /**
210      * Tests whether or not a string matches against a pattern. The pattern may contain two special characters:<br>
211      * '*' means zero or more characters<br>
212      * '?' means one and only one character
213      *
214      * @param pattern The pattern to match against. Must not be <code>null</code>.
215      * @param str The string which must be matched against the pattern. Must not be <code>null</code>.
216      * @return <code>true</code> if the string matches against the pattern, or <code>false</code> otherwise.
217      */
218     public static boolean match(String pattern, String str) {
219         return SelectorUtils.match(pattern, str);
220     }
221 
222     /**
223      * Tests whether or not a string matches against a pattern. The pattern may contain two special characters:<br>
224      * '*' means zero or more characters<br>
225      * '?' means one and only one character
226      *
227      * @param pattern The pattern to match against. Must not be <code>null</code>.
228      * @param str The string which must be matched against the pattern. Must not be <code>null</code>.
229      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
230      * @return <code>true</code> if the string matches against the pattern, or <code>false</code> otherwise.
231      */
232     protected static boolean match(String pattern, String str, boolean isCaseSensitive) {
233         return SelectorUtils.match(pattern, str, isCaseSensitive);
234     }
235 
236     /**
237      * <p>Sets the list of include patterns to use. All '/' and '\' characters are replaced by
238      * <code>File.separatorChar</code>, so the separator used need not match <code>File.separatorChar</code>.</p>
239      *
240      * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
241      *
242      * @param includes A list of include patterns. May be <code>null</code>, indicating that all files should be
243      *            included. If a non-<code>null</code> list is given, all elements must be non-<code>null</code>.
244      */
245     @Override
246     public void setIncludes(String[] includes) {
247         if (includes == null) {
248             this.includes = null;
249         } else {
250             final List<String> list = new ArrayList<String>(includes.length);
251             for (String include : includes) {
252                 if (include != null) {
253                     list.add(normalizePattern(include));
254                 }
255             }
256             this.includes = list.toArray(new String[0]);
257         }
258     }
259 
260     /**
261      * <p>Sets the list of exclude patterns to use. All '/' and '\' characters are replaced by
262      * <code>File.separatorChar</code>, so the separator used need not match <code>File.separatorChar</code>.</p>
263      *
264      * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
265      *
266      * @param excludes A list of exclude patterns. May be <code>null</code>, indicating that no files should be
267      *            excluded. If a non-<code>null</code> list is given, all elements must be non-<code>null</code>.
268      */
269     @Override
270     public void setExcludes(String[] excludes) {
271         if (excludes == null) {
272             this.excludes = null;
273         } else {
274             final List<String> list = new ArrayList<String>(excludes.length);
275             for (String exclude : excludes) {
276                 if (exclude != null) {
277                     list.add(normalizePattern(exclude));
278                 }
279             }
280             this.excludes = list.toArray(new String[0]);
281         }
282     }
283 
284     /**
285      * Normalizes the pattern, e.g. converts forward and backward slashes to the platform-specific file separator.
286      *
287      * @param pattern The pattern to normalize, must not be <code>null</code>.
288      * @return The normalized pattern, never <code>null</code>.
289      */
290     private String normalizePattern(String pattern) {
291         pattern = pattern.trim();
292 
293         if (pattern.startsWith(SelectorUtils.REGEX_HANDLER_PREFIX)) {
294             if (File.separatorChar == '\\') {
295                 pattern = StringUtils.replace(pattern, "/", "\\\\");
296             } else {
297                 pattern = StringUtils.replace(pattern, "\\\\", "/");
298             }
299         } else {
300             pattern = pattern.replace(File.separatorChar == '/' ? '\\' : '/', File.separatorChar);
301 
302             if (pattern.endsWith(File.separator)) {
303                 pattern += "**";
304             }
305         }
306 
307         return pattern;
308     }
309 
310     /**
311      * Tests whether or not a name matches against at least one include pattern.
312      *
313      * @param name The name to match. Must not be <code>null</code>.
314      * @return <code>true</code> when the name matches against at least one include pattern, or <code>false</code>
315      *         otherwise.
316      */
317     protected boolean isIncluded(String name) {
318         return includesPatterns.matches(name, isCaseSensitive);
319     }
320 
321     protected boolean isIncluded(String name, String[] tokenizedName) {
322         return includesPatterns.matches(name, tokenizedName, isCaseSensitive);
323     }
324 
325     protected boolean isIncluded(String name, char[][] tokenizedName) {
326         return includesPatterns.matches(name, tokenizedName, isCaseSensitive);
327     }
328 
329     /**
330      * Tests whether or not a name matches the start of at least one include pattern.
331      *
332      * @param name The name to match. Must not be <code>null</code>.
333      * @return <code>true</code> when the name matches against the start of at least one include pattern, or
334      *         <code>false</code> otherwise.
335      */
336     protected boolean couldHoldIncluded(String name) {
337         return includesPatterns.matchesPatternStart(name, isCaseSensitive);
338     }
339 
340     /**
341      * Tests whether or not a name matches against at least one exclude pattern.
342      *
343      * @param name The name to match. Must not be <code>null</code>.
344      * @return <code>true</code> when the name matches against at least one exclude pattern, or <code>false</code>
345      *         otherwise.
346      */
347     protected boolean isExcluded(String name) {
348         return excludesPatterns.matches(name, isCaseSensitive);
349     }
350 
351     protected boolean isExcluded(String name, String[] tokenizedName) {
352         return excludesPatterns.matches(name, tokenizedName, isCaseSensitive);
353     }
354 
355     protected boolean isExcluded(String name, char[][] tokenizedName) {
356         return excludesPatterns.matches(name, tokenizedName, isCaseSensitive);
357     }
358 
359     /**
360      * Adds default exclusions to the current exclusions set.
361      */
362     @Override
363     public void addDefaultExcludes() {
364         int excludesLength = excludes == null ? 0 : excludes.length;
365         String[] newExcludes;
366         newExcludes = new String[excludesLength + DEFAULTEXCLUDES.length];
367         if (excludesLength > 0) {
368             System.arraycopy(excludes, 0, newExcludes, 0, excludesLength);
369         }
370         for (int i = 0; i < DEFAULTEXCLUDES.length; i++) {
371             newExcludes[i + excludesLength] = DEFAULTEXCLUDES[i].replace('/', File.separatorChar);
372         }
373         excludes = newExcludes;
374     }
375 
376     protected void setupDefaultFilters() {
377         if (includes == null) {
378             // No includes supplied, so set it to 'matches all'
379             includes = new String[1];
380             includes[0] = "**";
381         }
382         if (excludes == null) {
383             excludes = new String[0];
384         }
385     }
386 
387     protected void setupMatchPatterns() {
388         includesPatterns = MatchPatterns.from(includes);
389         excludesPatterns = MatchPatterns.from(excludes);
390     }
391 
392     @Override
393     public void setFilenameComparator(Comparator<String> filenameComparator) {
394         this.filenameComparator = filenameComparator;
395     }
396 }