View Javadoc
1   package org.codehaus.plexus.util;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.util.ArrayList;
21  import java.util.Comparator;
22  import java.util.List;
23  
24  /**
25   * Scan a directory tree for files, with specified inclusions and exclusions.
26   */
27  public abstract class AbstractScanner implements Scanner {
28      /**
29       * Patterns which should be excluded by default, like SCM files
30       * <ul>
31       * <li>Misc: &#42;&#42;/&#42;~, &#42;&#42;/#&#42;#, &#42;&#42;/.#&#42;, &#42;&#42;/%&#42;%, &#42;&#42;/._&#42;</li>
32       * <li>CVS: &#42;&#42;/CVS, &#42;&#42;/CVS/&#42;&#42;, &#42;&#42;/.cvsignore</li>
33       * <li>RCS: &#42;&#42;/RCS, &#42;&#42;/RCS/&#42;&#42;</li>
34       * <li>SCCS: &#42;&#42;/SCCS, &#42;&#42;/SCCS/&#42;&#42;</li>
35       * <li>VSSercer: &#42;&#42;/vssver.scc</li>
36       * <li>MKS: &#42;&#42;/project.pj</li>
37       * <li>SVN: &#42;&#42;/.svn, &#42;&#42;/.svn/&#42;&#42;</li>
38       * <li>GNU: &#42;&#42;/.arch-ids, &#42;&#42;/.arch-ids/&#42;&#42;</li>
39       * <li>Bazaar: &#42;&#42;/.bzr, &#42;&#42;/.bzr/&#42;&#42;</li>
40       * <li>SurroundSCM: &#42;&#42;/.MySCMServerInfo</li>
41       * <li>Mac: &#42;&#42;/.DS_Store</li>
42       * <li>Serena Dimension: &#42;&#42;/.metadata, &#42;&#42;/.metadata/&#42;&#42;</li>
43       * <li>Mercurial: &#42;&#42;/.hg, &#42;&#42;/.hg/&#42;&#42;</li>
44       * <li>Git: &#42;&#42;/.git, &#42;&#42;/.git/&#42;&#42;, &#42;&#42;/.gitignore</li>
45       * <li>Bitkeeper: &#42;&#42;/BitKeeper, &#42;&#42;/BitKeeper/&#42;&#42;, &#42;&#42;/ChangeSet,
46       * &#42;&#42;/ChangeSet/&#42;&#42;</li>
47       * <li>Darcs: &#42;&#42;/_darcs, &#42;&#42;/_darcs/&#42;&#42;, &#42;&#42;/.darcsrepo,
48       * &#42;&#42;/.darcsrepo/&#42;&#42;&#42;&#42;/-darcs-backup&#42;, &#42;&#42;/.darcs-temp-mail
49       * </ul>
50       *
51       * @see #addDefaultExcludes()
52       */
53      public static final String[] DEFAULTEXCLUDES = {
54          // Miscellaneous typical temporary files
55          "**/*~",
56          "**/#*#",
57          "**/.#*",
58          "**/%*%",
59          "**/._*",
60  
61          // CVS
62          "**/CVS",
63          "**/CVS/**",
64          "**/.cvsignore",
65  
66          // RCS
67          "**/RCS",
68          "**/RCS/**",
69  
70          // SCCS
71          "**/SCCS",
72          "**/SCCS/**",
73  
74          // Visual SourceSafe
75          "**/vssver.scc",
76  
77          // MKS
78          "**/project.pj",
79  
80          // Subversion
81          "**/.svn",
82          "**/.svn/**",
83  
84          // Arch
85          "**/.arch-ids",
86          "**/.arch-ids/**",
87  
88          // Bazaar
89          "**/.bzr",
90          "**/.bzr/**",
91  
92          // SurroundSCM
93          "**/.MySCMServerInfo",
94  
95          // Mac
96          "**/.DS_Store",
97  
98          // Serena Dimensions Version 10
99          "**/.metadata",
100         "**/.metadata/**",
101 
102         // Mercurial
103         "**/.hg",
104         "**/.hg/**",
105 
106         // git
107         "**/.git",
108         "**/.git/**",
109         "**/.gitignore",
110 
111         // BitKeeper
112         "**/BitKeeper",
113         "**/BitKeeper/**",
114         "**/ChangeSet",
115         "**/ChangeSet/**",
116 
117         // darcs
118         "**/_darcs",
119         "**/_darcs/**",
120         "**/.darcsrepo",
121         "**/.darcsrepo/**",
122         "**/-darcs-backup*",
123         "**/.darcs-temp-mail"
124     };
125 
126     /**
127      * The patterns for the files to be included.
128      */
129     protected String[] includes;
130 
131     private MatchPatterns includesPatterns;
132 
133     /**
134      * The patterns for the files to be excluded.
135      */
136     protected String[] excludes;
137 
138     private MatchPatterns excludesPatterns;
139 
140     /**
141      * Whether or not the file system should be treated as a case sensitive one.
142      */
143     protected boolean isCaseSensitive = true;
144 
145     /**
146      * @since 3.3.0
147      */
148     protected Comparator<String> filenameComparator;
149 
150     /**
151      * Sets whether or not the file system should be regarded as case sensitive.
152      *
153      * @param isCaseSensitive whether or not the file system should be regarded as a case sensitive one
154      */
155     public void setCaseSensitive(boolean isCaseSensitive) {
156         this.isCaseSensitive = isCaseSensitive;
157     }
158 
159     /**
160      * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p>
161      *
162      * <p>This is not a general purpose test and should only be used if you can live with false positives. For example,
163      * <code>pattern=**\a</code> and <code>str=b</code> will yield <code>true</code>.</p>
164      *
165      * @param pattern The pattern to match against. Must not be <code>null</code>.
166      * @param str The path to match, as a String. Must not be <code>null</code>.
167      * @return whether or not a given path matches the start of a given pattern up to the first "**".
168      */
169     protected static boolean matchPatternStart(String pattern, String str) {
170         return SelectorUtils.matchPatternStart(pattern, str);
171     }
172 
173     /**
174      * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p>
175      *
176      * <p>This is not a general purpose test and should only be used if you can live with false positives. For example,
177      * <code>pattern=**\a</code> and <code>str=b</code> will yield <code>true</code>.</p>
178      *
179      * @param pattern The pattern to match against. Must not be <code>null</code>.
180      * @param str The path to match, as a String. Must not be <code>null</code>.
181      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
182      * @return whether or not a given path matches the start of a given pattern up to the first "**".
183      */
184     protected static boolean matchPatternStart(String pattern, String str, boolean isCaseSensitive) {
185         return SelectorUtils.matchPatternStart(pattern, str, isCaseSensitive);
186     }
187 
188     /**
189      * Tests whether or not a given path matches a given pattern.
190      *
191      * @param pattern The pattern to match against. Must not be <code>null</code>.
192      * @param str The path to match, as a String. Must not be <code>null</code>.
193      * @return <code>true</code> if the pattern matches against the string, or <code>false</code> otherwise.
194      */
195     protected static boolean matchPath(String pattern, String str) {
196         return SelectorUtils.matchPath(pattern, str);
197     }
198 
199     /**
200      * Tests whether or not a given path matches a given pattern.
201      *
202      * @param pattern The pattern to match against. Must not be <code>null</code>.
203      * @param str The path to match, as a String. Must not be <code>null</code>.
204      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
205      * @return <code>true</code> if the pattern matches against the string, or <code>false</code> otherwise.
206      */
207     protected static boolean matchPath(String pattern, String str, boolean isCaseSensitive) {
208         return SelectorUtils.matchPath(pattern, str, isCaseSensitive);
209     }
210 
211     /**
212      * Tests whether or not a string matches against a pattern. The pattern may contain two special characters:<br>
213      * '*' means zero or more characters<br>
214      * '?' means one and only one character
215      *
216      * @param pattern The pattern to match against. Must not be <code>null</code>.
217      * @param str The string which must be matched against the pattern. Must not be <code>null</code>.
218      * @return <code>true</code> if the string matches against the pattern, or <code>false</code> otherwise.
219      */
220     public static boolean match(String pattern, String str) {
221         return SelectorUtils.match(pattern, str);
222     }
223 
224     /**
225      * Tests whether or not a string matches against a pattern. The pattern may contain two special characters:<br>
226      * '*' means zero or more characters<br>
227      * '?' means one and only one character
228      *
229      * @param pattern The pattern to match against. Must not be <code>null</code>.
230      * @param str The string which must be matched against the pattern. Must not be <code>null</code>.
231      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
232      * @return <code>true</code> if the string matches against the pattern, or <code>false</code> otherwise.
233      */
234     protected static boolean match(String pattern, String str, boolean isCaseSensitive) {
235         return SelectorUtils.match(pattern, str, isCaseSensitive);
236     }
237 
238     /**
239      * <p>Sets the list of include patterns to use. All '/' and '\' characters are replaced by
240      * <code>File.separatorChar</code>, so the separator used need not match <code>File.separatorChar</code>.</p>
241      *
242      * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
243      *
244      * @param includes A list of include patterns. May be <code>null</code>, indicating that all files should be
245      *            included. If a non-<code>null</code> list is given, all elements must be non-<code>null</code>.
246      */
247     @Override
248     public void setIncludes(String[] includes) {
249         if (includes == null) {
250             this.includes = null;
251         } else {
252             final List<String> list = new ArrayList<String>(includes.length);
253             for (String include : includes) {
254                 if (include != null) {
255                     list.add(normalizePattern(include));
256                 }
257             }
258             this.includes = list.toArray(new String[0]);
259         }
260     }
261 
262     /**
263      * <p>Sets the list of exclude patterns to use. All '/' and '\' characters are replaced by
264      * <code>File.separatorChar</code>, so the separator used need not match <code>File.separatorChar</code>.</p>
265      *
266      * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
267      *
268      * @param excludes A list of exclude patterns. May be <code>null</code>, indicating that no files should be
269      *            excluded. If a non-<code>null</code> list is given, all elements must be non-<code>null</code>.
270      */
271     @Override
272     public void setExcludes(String[] excludes) {
273         if (excludes == null) {
274             this.excludes = null;
275         } else {
276             final List<String> list = new ArrayList<String>(excludes.length);
277             for (String exclude : excludes) {
278                 if (exclude != null) {
279                     list.add(normalizePattern(exclude));
280                 }
281             }
282             this.excludes = list.toArray(new String[0]);
283         }
284     }
285 
286     /**
287      * Normalizes the pattern, e.g. converts forward and backward slashes to the platform-specific file separator.
288      *
289      * @param pattern The pattern to normalize, must not be <code>null</code>.
290      * @return The normalized pattern, never <code>null</code>.
291      */
292     private String normalizePattern(String pattern) {
293         pattern = pattern.trim();
294 
295         if (pattern.startsWith(SelectorUtils.REGEX_HANDLER_PREFIX)) {
296             if (File.separatorChar == '\\') {
297                 pattern = StringUtils.replace(pattern, "/", "\\\\");
298             } else {
299                 pattern = StringUtils.replace(pattern, "\\\\", "/");
300             }
301         } else {
302             pattern = pattern.replace(File.separatorChar == '/' ? '\\' : '/', File.separatorChar);
303 
304             if (pattern.endsWith(File.separator)) {
305                 pattern += "**";
306             }
307         }
308 
309         return pattern;
310     }
311 
312     /**
313      * Tests whether or not a name matches against at least one include pattern.
314      *
315      * @param name The name to match. Must not be <code>null</code>.
316      * @return <code>true</code> when the name matches against at least one include pattern, or <code>false</code>
317      *         otherwise.
318      */
319     protected boolean isIncluded(String name) {
320         return includesPatterns.matches(name, isCaseSensitive);
321     }
322 
323     protected boolean isIncluded(String name, String[] tokenizedName) {
324         return includesPatterns.matches(name, tokenizedName, isCaseSensitive);
325     }
326 
327     protected boolean isIncluded(String name, char[][] tokenizedName) {
328         return includesPatterns.matches(name, tokenizedName, isCaseSensitive);
329     }
330 
331     /**
332      * Tests whether or not a name matches the start of at least one include pattern.
333      *
334      * @param name The name to match. Must not be <code>null</code>.
335      * @return <code>true</code> when the name matches against the start of at least one include pattern, or
336      *         <code>false</code> otherwise.
337      */
338     protected boolean couldHoldIncluded(String name) {
339         return includesPatterns.matchesPatternStart(name, isCaseSensitive);
340     }
341 
342     /**
343      * Tests whether or not a name matches against at least one exclude pattern.
344      *
345      * @param name The name to match. Must not be <code>null</code>.
346      * @return <code>true</code> when the name matches against at least one exclude pattern, or <code>false</code>
347      *         otherwise.
348      */
349     protected boolean isExcluded(String name) {
350         return excludesPatterns.matches(name, isCaseSensitive);
351     }
352 
353     protected boolean isExcluded(String name, String[] tokenizedName) {
354         return excludesPatterns.matches(name, tokenizedName, isCaseSensitive);
355     }
356 
357     protected boolean isExcluded(String name, char[][] tokenizedName) {
358         return excludesPatterns.matches(name, tokenizedName, isCaseSensitive);
359     }
360 
361     /**
362      * Adds default exclusions to the current exclusions set.
363      */
364     @Override
365     public void addDefaultExcludes() {
366         int excludesLength = excludes == null ? 0 : excludes.length;
367         String[] newExcludes;
368         newExcludes = new String[excludesLength + DEFAULTEXCLUDES.length];
369         if (excludesLength > 0) {
370             System.arraycopy(excludes, 0, newExcludes, 0, excludesLength);
371         }
372         for (int i = 0; i < DEFAULTEXCLUDES.length; i++) {
373             newExcludes[i + excludesLength] = DEFAULTEXCLUDES[i].replace('/', File.separatorChar);
374         }
375         excludes = newExcludes;
376     }
377 
378     protected void setupDefaultFilters() {
379         if (includes == null) {
380             // No includes supplied, so set it to 'matches all'
381             includes = new String[1];
382             includes[0] = "**";
383         }
384         if (excludes == null) {
385             excludes = new String[0];
386         }
387     }
388 
389     protected void setupMatchPatterns() {
390         includesPatterns = MatchPatterns.from(includes);
391         excludesPatterns = MatchPatterns.from(excludes);
392     }
393 
394     @Override
395     public void setFilenameComparator(Comparator<String> filenameComparator) {
396         this.filenameComparator = filenameComparator;
397     }
398 }