View Javadoc
1   package org.codehaus.plexus.util;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.util.ArrayList;
21  import java.util.List;
22  
23  /**
24   * Scan a directory tree for files, with specified inclusions and exclusions.
25   */
26  public abstract class AbstractScanner
27      implements Scanner
28  {
29      /**
30       * Patterns which should be excluded by default, like SCM files
31       * <ul>
32       * <li>Misc: &#42;&#42;/&#42;~, &#42;&#42;/#&#42;#, &#42;&#42;/.#&#42;, &#42;&#42;/%&#42;%, &#42;&#42;/._&#42;</li>
33       * <li>CVS: &#42;&#42;/CVS, &#42;&#42;/CVS/&#42;&#42;, &#42;&#42;/.cvsignore</li>
34       * <li>RCS: &#42;&#42;/RCS, &#42;&#42;/RCS/&#42;&#42;</li>
35       * <li>SCCS: &#42;&#42;/SCCS, &#42;&#42;/SCCS/&#42;&#42;</li>
36       * <li>VSSercer: &#42;&#42;/vssver.scc</li>
37       * <li>MKS: &#42;&#42;/project.pj</li>
38       * <li>SVN: &#42;&#42;/.svn, &#42;&#42;/.svn/&#42;&#42;</li>
39       * <li>GNU: &#42;&#42;/.arch-ids, &#42;&#42;/.arch-ids/&#42;&#42;</li>
40       * <li>Bazaar: &#42;&#42;/.bzr, &#42;&#42;/.bzr/&#42;&#42;</li>
41       * <li>SurroundSCM: &#42;&#42;/.MySCMServerInfo</li>
42       * <li>Mac: &#42;&#42;/.DS_Store</li>
43       * <li>Serena Dimension: &#42;&#42;/.metadata, &#42;&#42;/.metadata/&#42;&#42;</li>
44       * <li>Mercurial: &#42;&#42;/.hg, &#42;&#42;/.hg/&#42;&#42;, &#42;&#42;/.hgignore</li>
45       * <li>GIT: &#42;&#42;/.git, &#42;&#42;/.gitignore, &#42;&#42;/.gitattributes, &#42;&#42;/.git/&#42;&#42;</li>
46       * <li>Bitkeeper: &#42;&#42;/BitKeeper, &#42;&#42;/BitKeeper/&#42;&#42;, &#42;&#42;/ChangeSet,
47       * &#42;&#42;/ChangeSet/&#42;&#42;</li>
48       * <li>Darcs: &#42;&#42;/_darcs, &#42;&#42;/_darcs/&#42;&#42;, &#42;&#42;/.darcsrepo,
49       * &#42;&#42;/.darcsrepo/&#42;&#42;&#42;&#42;/-darcs-backup&#42;, &#42;&#42;/.darcs-temp-mail
50       * </ul>
51       *
52       * @see #addDefaultExcludes()
53       */
54      public static final String[] DEFAULTEXCLUDES = {
55          // Miscellaneous typical temporary files
56          "**/*~", "**/#*#", "**/.#*", "**/%*%", "**/._*",
57  
58          // CVS
59          "**/CVS", "**/CVS/**", "**/.cvsignore",
60  
61          // RCS
62          "**/RCS", "**/RCS/**",
63  
64          // SCCS
65          "**/SCCS", "**/SCCS/**",
66  
67          // Visual SourceSafe
68          "**/vssver.scc",
69  
70          // MKS
71          "**/project.pj",
72  
73          // Subversion
74          "**/.svn", "**/.svn/**",
75  
76          // Arch
77          "**/.arch-ids", "**/.arch-ids/**",
78  
79          // Bazaar
80          "**/.bzr", "**/.bzr/**",
81  
82          // SurroundSCM
83          "**/.MySCMServerInfo",
84  
85          // Mac
86          "**/.DS_Store",
87  
88          // Serena Dimensions Version 10
89          "**/.metadata", "**/.metadata/**",
90  
91          // Mercurial
92          "**/.hg", "**/.hgignore", "**/.hg/**",
93  
94          // git
95          "**/.git", "**/.gitignore", "**/.gitattributes", "**/.git/**",
96  
97          // BitKeeper
98          "**/BitKeeper", "**/BitKeeper/**", "**/ChangeSet", "**/ChangeSet/**",
99  
100         // darcs
101         "**/_darcs", "**/_darcs/**", "**/.darcsrepo", "**/.darcsrepo/**", "**/-darcs-backup*", "**/.darcs-temp-mail" };
102 
103     /**
104      * The patterns for the files to be included.
105      */
106     protected String[] includes;
107 
108     private MatchPatterns includesPatterns;
109 
110     /**
111      * The patterns for the files to be excluded.
112      */
113     protected String[] excludes;
114 
115     private MatchPatterns excludesPatterns;
116 
117     /**
118      * Whether or not the file system should be treated as a case sensitive one.
119      */
120     protected boolean isCaseSensitive = true;
121 
122     /**
123      * Sets whether or not the file system should be regarded as case sensitive.
124      *
125      * @param isCaseSensitive whether or not the file system should be regarded as a case sensitive one
126      */
127     public void setCaseSensitive( boolean isCaseSensitive )
128     {
129         this.isCaseSensitive = isCaseSensitive;
130     }
131 
132     /**
133      * Tests whether or not a given path matches the start of a given pattern up to the first "**".
134      * <p/>
135      * This is not a general purpose test and should only be used if you can live with false positives. For example,
136      * <code>pattern=**\a</code> and <code>str=b</code> will yield <code>true</code>.
137      *
138      * @param pattern The pattern to match against. Must not be <code>null</code>.
139      * @param str The path to match, as a String. Must not be <code>null</code>.
140      * @return whether or not a given path matches the start of a given pattern up to the first "**".
141      */
142     protected static boolean matchPatternStart( String pattern, String str )
143     {
144         return SelectorUtils.matchPatternStart( pattern, str );
145     }
146 
147     /**
148      * Tests whether or not a given path matches the start of a given pattern up to the first "**".
149      * <p/>
150      * This is not a general purpose test and should only be used if you can live with false positives. For example,
151      * <code>pattern=**\a</code> and <code>str=b</code> will yield <code>true</code>.
152      *
153      * @param pattern The pattern to match against. Must not be <code>null</code>.
154      * @param str The path to match, as a String. Must not be <code>null</code>.
155      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
156      * @return whether or not a given path matches the start of a given pattern up to the first "**".
157      */
158     protected static boolean matchPatternStart( String pattern, String str, boolean isCaseSensitive )
159     {
160         return SelectorUtils.matchPatternStart( pattern, str, isCaseSensitive );
161     }
162 
163     /**
164      * Tests whether or not a given path matches a given pattern.
165      *
166      * @param pattern The pattern to match against. Must not be <code>null</code>.
167      * @param str The path to match, as a String. Must not be <code>null</code>.
168      * @return <code>true</code> if the pattern matches against the string, or <code>false</code> otherwise.
169      */
170     protected static boolean matchPath( String pattern, String str )
171     {
172         return SelectorUtils.matchPath( pattern, str );
173     }
174 
175     /**
176      * Tests whether or not a given path matches a given pattern.
177      *
178      * @param pattern The pattern to match against. Must not be <code>null</code>.
179      * @param str The path to match, as a String. Must not be <code>null</code>.
180      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
181      * @return <code>true</code> if the pattern matches against the string, or <code>false</code> otherwise.
182      */
183     protected static boolean matchPath( String pattern, String str, boolean isCaseSensitive )
184     {
185         return SelectorUtils.matchPath( pattern, str, isCaseSensitive );
186     }
187 
188     /**
189      * Tests whether or not a string matches against a pattern. The pattern may contain two special characters:<br>
190      * '*' means zero or more characters<br>
191      * '?' means one and only one character
192      *
193      * @param pattern The pattern to match against. Must not be <code>null</code>.
194      * @param str The string which must be matched against the pattern. Must not be <code>null</code>.
195      * @return <code>true</code> if the string matches against the pattern, or <code>false</code> otherwise.
196      */
197     public static boolean match( String pattern, String str )
198     {
199         return SelectorUtils.match( pattern, str );
200     }
201 
202     /**
203      * Tests whether or not a string matches against a pattern. The pattern may contain two special characters:<br>
204      * '*' means zero or more characters<br>
205      * '?' means one and only one character
206      *
207      * @param pattern The pattern to match against. Must not be <code>null</code>.
208      * @param str The string which must be matched against the pattern. Must not be <code>null</code>.
209      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
210      * @return <code>true</code> if the string matches against the pattern, or <code>false</code> otherwise.
211      */
212     protected static boolean match( String pattern, String str, boolean isCaseSensitive )
213     {
214         return SelectorUtils.match( pattern, str, isCaseSensitive );
215     }
216 
217     /**
218      * Sets the list of include patterns to use. All '/' and '\' characters are replaced by
219      * <code>File.separatorChar</code>, so the separator used need not match <code>File.separatorChar</code>.
220      * <p/>
221      * When a pattern ends with a '/' or '\', "**" is appended.
222      *
223      * @param includes A list of include patterns. May be <code>null</code>, indicating that all files should be
224      *            included. If a non-<code>null</code> list is given, all elements must be non-<code>null</code>.
225      */
226     public void setIncludes( String[] includes )
227     {
228         if ( includes == null )
229         {
230             this.includes = null;
231         }
232         else
233         {
234             final List<String> list = new ArrayList<String>( includes.length );
235             for ( String include : includes )
236             {
237                 if ( include != null )
238                 {
239                     list.add( normalizePattern( include ) );
240                 }
241             }
242             this.includes = list.toArray( new String[list.size()] );
243         }
244     }
245 
246     /**
247      * Sets the list of exclude patterns to use. All '/' and '\' characters are replaced by
248      * <code>File.separatorChar</code>, so the separator used need not match <code>File.separatorChar</code>.
249      * <p/>
250      * When a pattern ends with a '/' or '\', "**" is appended.
251      *
252      * @param excludes A list of exclude patterns. May be <code>null</code>, indicating that no files should be
253      *            excluded. If a non-<code>null</code> list is given, all elements must be non-<code>null</code>.
254      */
255     public void setExcludes( String[] excludes )
256     {
257         if ( excludes == null )
258         {
259             this.excludes = null;
260         }
261         else
262         {
263             final List<String> list = new ArrayList<String>( excludes.length );
264             for ( String exclude : excludes )
265             {
266                 if ( exclude != null )
267                 {
268                     list.add( normalizePattern( exclude ) );
269                 }
270             }
271             this.excludes = list.toArray( new String[list.size()] );
272         }
273     }
274 
275     /**
276      * Normalizes the pattern, e.g. converts forward and backward slashes to the platform-specific file separator.
277      *
278      * @param pattern The pattern to normalize, must not be <code>null</code>.
279      * @return The normalized pattern, never <code>null</code>.
280      */
281     private String normalizePattern( String pattern )
282     {
283         pattern = pattern.trim();
284 
285         if ( pattern.startsWith( SelectorUtils.REGEX_HANDLER_PREFIX ) )
286         {
287             if ( File.separatorChar == '\\' )
288             {
289                 pattern = StringUtils.replace( pattern, "/", "\\\\" );
290             }
291             else
292             {
293                 pattern = StringUtils.replace( pattern, "\\\\", "/" );
294             }
295         }
296         else
297         {
298             pattern = pattern.replace( File.separatorChar == '/' ? '\\' : '/', File.separatorChar );
299 
300             if ( pattern.endsWith( File.separator ) )
301             {
302                 pattern += "**";
303             }
304         }
305 
306         return pattern;
307     }
308 
309     /**
310      * Tests whether or not a name matches against at least one include pattern.
311      *
312      * @param name The name to match. Must not be <code>null</code>.
313      * @return <code>true</code> when the name matches against at least one include pattern, or <code>false</code>
314      *         otherwise.
315      */
316     protected boolean isIncluded( String name )
317     {
318         return includesPatterns.matches( name, isCaseSensitive );
319     }
320 
321     protected boolean isIncluded( String name, String[] tokenizedName )
322     {
323         return includesPatterns.matches( name, tokenizedName, isCaseSensitive );
324     }
325 
326     /**
327      * Tests whether or not a name matches the start of at least one include pattern.
328      *
329      * @param name The name to match. Must not be <code>null</code>.
330      * @return <code>true</code> when the name matches against the start of at least one include pattern, or
331      *         <code>false</code> otherwise.
332      */
333     protected boolean couldHoldIncluded( String name )
334     {
335         return includesPatterns.matchesPatternStart( name, isCaseSensitive );
336     }
337 
338     /**
339      * Tests whether or not a name matches against at least one exclude pattern.
340      *
341      * @param name The name to match. Must not be <code>null</code>.
342      * @return <code>true</code> when the name matches against at least one exclude pattern, or <code>false</code>
343      *         otherwise.
344      */
345     protected boolean isExcluded( String name )
346     {
347         return excludesPatterns.matches( name, isCaseSensitive );
348     }
349 
350     protected boolean isExcluded( String name, String[] tokenizedName )
351     {
352         return excludesPatterns.matches( name, tokenizedName, isCaseSensitive );
353     }
354 
355     /**
356      * Adds default exclusions to the current exclusions set.
357      */
358     public void addDefaultExcludes()
359     {
360         int excludesLength = excludes == null ? 0 : excludes.length;
361         String[] newExcludes;
362         newExcludes = new String[excludesLength + DEFAULTEXCLUDES.length];
363         if ( excludesLength > 0 )
364         {
365             System.arraycopy( excludes, 0, newExcludes, 0, excludesLength );
366         }
367         for ( int i = 0; i < DEFAULTEXCLUDES.length; i++ )
368         {
369             newExcludes[i + excludesLength] = DEFAULTEXCLUDES[i].replace( '/', File.separatorChar );
370         }
371         excludes = newExcludes;
372     }
373 
374     protected void setupDefaultFilters()
375     {
376         if ( includes == null )
377         {
378             // No includes supplied, so set it to 'matches all'
379             includes = new String[1];
380             includes[0] = "**";
381         }
382         if ( excludes == null )
383         {
384             excludes = new String[0];
385         }
386     }
387 
388     protected void setupMatchPatterns()
389     {
390         includesPatterns = MatchPatterns.from( includes );
391         excludesPatterns = MatchPatterns.from( excludes );
392     }
393 }