View Javadoc
1   package org.codehaus.plexus.util;
2   
3   /*
4    * The Apache Software License, Version 1.1
5    *
6    * Copyright (c) 2000-2003 The Apache Software Foundation.  All rights
7    * reserved.
8    *
9    * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   *
13   * 1. Redistributions of source code must retain the above copyright
14   *    notice, this list of conditions and the following disclaimer.
15   *
16   * 2. Redistributions in binary form must reproduce the above copyright
17   *    notice, this list of conditions and the following disclaimer in
18   *    the documentation and/or other materials provided with the
19   *    distribution.
20   *
21   * 3. The end-user documentation included with the redistribution, if
22   *    any, must include the following acknowledgement:
23   *       "This product includes software developed by the
24   *        Apache Software Foundation (http://www.codehaus.org/)."
25   *    Alternately, this acknowledgement may appear in the software itself,
26   *    if and wherever such third-party acknowledgements normally appear.
27   *
28   * 4. The names "Ant" and "Apache Software
29   *    Foundation" must not be used to endorse or promote products derived
30   *    from this software without prior written permission. For written
31   *    permission, please contact codehaus@codehaus.org.
32   *
33   * 5. Products derived from this software may not be called "Apache"
34   *    nor may "Apache" appear in their names without prior written
35   *    permission of the Apache Group.
36   *
37   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48   * SUCH DAMAGE.
49   * ====================================================================
50   *
51   * This software consists of voluntary contributions made by many
52   * individuals on behalf of the Apache Software Foundation.  For more
53   * information on the Apache Software Foundation, please see
54   * <http://www.codehaus.org/>.
55   */
56  
57  import java.io.File;
58  import java.io.IOException;
59  import java.util.ArrayList;
60  import java.util.Arrays;
61  
62  /**
63   * <p>Class for scanning a directory for files/directories which match certain criteria.</p>
64   *
65   * <p>These criteria consist of selectors and patterns which have been specified. With the selectors you can select which
66   * files you want to have included. Files which are not selected are excluded. With patterns you can include or exclude
67   * files based on their filename.</p>
68   *
69   * <p>The idea is simple. A given directory is recursively scanned for all files and directories. Each file/directory is
70   * matched against a set of selectors, including special support for matching against filenames with include and and
71   * exclude patterns. Only files/directories which match at least one pattern of the include pattern list or other file
72   * selector, and don't match any pattern of the exclude pattern list or fail to match against a required selector will
73   * be placed in the list of files/directories found.</p>
74   *
75   * <p>When no list of include patterns is supplied, "**" will be used, which means that everything will be matched. When no
76   * list of exclude patterns is supplied, an empty list is used, such that nothing will be excluded. When no selectors
77   * are supplied, none are applied.</p>
78   *
79   * <p>The filename pattern matching is done as follows: The name to be matched is split up in path segments. A path segment
80   * is the name of a directory or file, which is bounded by <code>File.separator</code> ('/' under UNIX, '\' under
81   * Windows). For example, "abc/def/ghi/xyz.java" is split up in the segments "abc", "def","ghi" and "xyz.java". The same
82   * is done for the pattern against which should be matched.</p>
83   *
84   * <p>The segments of the name and the pattern are then matched against each other. When '**' is used for a path segment in
85   * the pattern, it matches zero or more path segments of the name.</p>
86   *
87   * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of the pattern and the
88   * string to match:<br>
89   * When a pattern starts with a <code>File.separator</code>, the string to match must also start with a
90   * <code>File.separator</code>. When a pattern does not start with a <code>File.separator</code>, the string to match
91   * may not start with a <code>File.separator</code>. When one of these rules is not obeyed, the string will not match.</p>
92   *
93   * <p>When a name path segment is matched against a pattern path segment, the following special characters can be used:<br>
94   * '*' matches zero or more characters<br>
95   * '?' matches one character.</p>
96   *
97   * Examples:
98   * <ul>
99   *   <li>"**\*.class" matches all .class files/dirs in a directory tree.</li>
100  *   <li>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and then ".java", in a
101  * directory called test.</li>
102  *   <li>"**" matches everything in a directory tree.</li>
103  *   <li>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent directory called test
104  * (e.g. "abc\test\def\ghi\XYZ123").</li>
105  * </ul>
106  *
107  * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
108  * Example of usage:
109  * <pre>
110  * String[] includes = { "**\\*.class" };
111  * String[] excludes = { "modules\\*\\**" };
112  * ds.setIncludes( includes );
113  * ds.setExcludes( excludes );
114  * ds.setBasedir( new File( "test" ) );
115  * ds.setCaseSensitive( true );
116  * ds.scan();
117  *
118  * System.out.println( "FILES:" );
119  * String[] files = ds.getIncludedFiles();
120  * for ( int i = 0; i &lt; files.length; i++ )
121  * {
122  *     System.out.println( files[i] );
123  * }
124  * </pre>
125  *
126  * <p>This will scan a directory called test for .class files, but excludes all files in all proper subdirectories of a
127  * directory called "modules"</p>
128  *
129  * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
130  * @author Magesh Umasankar
131  * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
132  * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
133  */
134 public class DirectoryScanner extends AbstractScanner {
135 
136     private static final String[] EMPTY_STRING_ARRAY = new String[0];
137 
138     /**
139      * The base directory to be scanned.
140      */
141     protected File basedir;
142 
143     /**
144      * The files which matched at least one include and no excludes and were selected.
145      */
146     protected ArrayList<String> filesIncluded;
147 
148     /**
149      * The files which did not match any includes or selectors.
150      */
151     protected ArrayList<String> filesNotIncluded;
152 
153     /**
154      * The files which matched at least one include and at least one exclude.
155      */
156     protected ArrayList<String> filesExcluded;
157 
158     /**
159      * The directories which matched at least one include and no excludes and were selected.
160      */
161     protected ArrayList<String> dirsIncluded;
162 
163     /**
164      * The directories which were found and did not match any includes.
165      */
166     protected ArrayList<String> dirsNotIncluded;
167 
168     /**
169      * The directories which matched at least one include and at least one exclude.
170      */
171     protected ArrayList<String> dirsExcluded;
172 
173     /**
174      * The files which matched at least one include and no excludes and which a selector discarded.
175      */
176     protected ArrayList<String> filesDeselected;
177 
178     /**
179      * The directories which matched at least one include and no excludes but which a selector discarded.
180      */
181     protected ArrayList<String> dirsDeselected;
182 
183     /**
184      * Whether or not our results were built by a slow scan.
185      */
186     protected boolean haveSlowResults = false;
187 
188     /**
189      * Whether or not symbolic links should be followed.
190      *
191      * @since Ant 1.5
192      */
193     private boolean followSymlinks = true;
194 
195     /**
196      * Whether or not everything tested so far has been included.
197      */
198     protected boolean everythingIncluded = true;
199 
200     private final char[][] tokenizedEmpty = MatchPattern.tokenizePathToCharArray("", File.separator);
201 
202     /**
203      * Sole constructor.
204      */
205     public DirectoryScanner() {}
206 
207     /**
208      * Sets the base directory to be scanned. This is the directory which is scanned recursively. All '/' and '\'
209      * characters are replaced by <code>File.separatorChar</code>, so the separator used need not match
210      * <code>File.separatorChar</code>.
211      *
212      * @param basedir The base directory to scan. Must not be <code>null</code>.
213      */
214     public void setBasedir(String basedir) {
215         setBasedir(new File(basedir.replace('/', File.separatorChar).replace('\\', File.separatorChar)));
216     }
217 
218     /**
219      * Sets the base directory to be scanned. This is the directory which is scanned recursively.
220      *
221      * @param basedir The base directory for scanning. Should not be <code>null</code>.
222      */
223     public void setBasedir(File basedir) {
224         this.basedir = basedir;
225     }
226 
227     /**
228      * Returns the base directory to be scanned. This is the directory which is scanned recursively.
229      *
230      * @return the base directory to be scanned
231      */
232     @Override
233     public File getBasedir() {
234         return basedir;
235     }
236 
237     /**
238      * Sets whether or not symbolic links should be followed.
239      *
240      * @param followSymlinks whether or not symbolic links should be followed
241      */
242     public void setFollowSymlinks(boolean followSymlinks) {
243         this.followSymlinks = followSymlinks;
244     }
245 
246     /**
247      * Returns whether or not the scanner has included all the files or directories it has come across so far.
248      *
249      * @return <code>true</code> if all files and directories which have been found so far have been included.
250      */
251     public boolean isEverythingIncluded() {
252         return everythingIncluded;
253     }
254 
255     /**
256      * Scans the base directory for files which match at least one include pattern and don't match any exclude patterns.
257      * If there are selectors then the files must pass muster there, as well.
258      *
259      * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is <code>null</code>, doesn't
260      *             exist, or isn't a directory).
261      */
262     @Override
263     public void scan() throws IllegalStateException {
264         if (basedir == null) {
265             throw new IllegalStateException("No basedir set");
266         }
267         if (!basedir.exists()) {
268             throw new IllegalStateException("basedir " + basedir + " does not exist");
269         }
270         if (!basedir.isDirectory()) {
271             throw new IllegalStateException("basedir " + basedir + " is not a directory");
272         }
273 
274         setupDefaultFilters();
275         setupMatchPatterns();
276 
277         filesIncluded = new ArrayList<String>();
278         filesNotIncluded = new ArrayList<String>();
279         filesExcluded = new ArrayList<String>();
280         filesDeselected = new ArrayList<String>();
281         dirsIncluded = new ArrayList<String>();
282         dirsNotIncluded = new ArrayList<String>();
283         dirsExcluded = new ArrayList<String>();
284         dirsDeselected = new ArrayList<String>();
285 
286         if (isIncluded("", tokenizedEmpty)) {
287 
288             if (!isExcluded("", tokenizedEmpty)) {
289                 if (isSelected("", basedir)) {
290                     dirsIncluded.add("");
291                 } else {
292                     dirsDeselected.add("");
293                 }
294             } else {
295                 dirsExcluded.add("");
296             }
297         } else {
298             dirsNotIncluded.add("");
299         }
300         scandir(basedir, "", true);
301     }
302 
303     /**
304      * <p>Top level invocation for a slow scan. A slow scan builds up a full list of excluded/included files/directories,
305      * whereas a fast scan will only have full results for included files, as it ignores directories which can't
306      * possibly hold any included files/directories.</p>
307      *
308      * <p>Returns immediately if a slow scan has already been completed.</p>
309      */
310     protected void slowScan() {
311         if (haveSlowResults) {
312             return;
313         }
314 
315         String[] excl = dirsExcluded.toArray(EMPTY_STRING_ARRAY);
316         String[] notIncl = dirsNotIncluded.toArray(EMPTY_STRING_ARRAY);
317 
318         for (String anExcl : excl) {
319             if (!couldHoldIncluded(anExcl)) {
320                 scandir(new File(basedir, anExcl), anExcl + File.separator, false);
321             }
322         }
323 
324         for (String aNotIncl : notIncl) {
325             if (!couldHoldIncluded(aNotIncl)) {
326                 scandir(new File(basedir, aNotIncl), aNotIncl + File.separator, false);
327             }
328         }
329 
330         haveSlowResults = true;
331     }
332 
333     /**
334      * Scans the given directory for files and directories. Found files and directories are placed in their respective
335      * collections, based on the matching of includes, excludes, and the selectors. When a directory is found, it is
336      * scanned recursively.
337      *
338      * @param dir The directory to scan. Must not be <code>null</code>.
339      * @param vpath The path relative to the base directory (needed to prevent problems with an absolute path when using
340      *            dir). Must not be <code>null</code>.
341      * @param fast Whether or not this call is part of a fast scan.
342      * @see #filesIncluded
343      * @see #filesNotIncluded
344      * @see #filesExcluded
345      * @see #dirsIncluded
346      * @see #dirsNotIncluded
347      * @see #dirsExcluded
348      * @see #slowScan
349      */
350     protected void scandir(File dir, String vpath, boolean fast) {
351         String[] newfiles = dir.list();
352 
353         if (newfiles == null) {
354             /*
355              * two reasons are mentioned in the API docs for File.list (1) dir is not a directory. This is impossible as
356              * we wouldn't get here in this case. (2) an IO error occurred (why doesn't it throw an exception then???)
357              */
358 
359             /*
360              * [jdcasey] (2) is apparently happening to me, as this is killing one of my tests... this is affecting the
361              * assembly plugin, fwiw. I will initialize the newfiles array as zero-length for now. NOTE: I can't find
362              * the problematic code, as it appears to come from a native method in UnixFileSystem...
363              */
364             /*
365              * [bentmann] A null array will also be returned from list() on NTFS when dir refers to a soft link or
366              * junction point whose target is not existent.
367              */
368             newfiles = EMPTY_STRING_ARRAY;
369 
370             // throw new IOException( "IO error scanning directory " + dir.getAbsolutePath() );
371         }
372 
373         if (!followSymlinks) {
374             try {
375                 if (isParentSymbolicLink(dir, null)) {
376                     for (String newfile : newfiles) {
377                         String name = vpath + newfile;
378                         File file = new File(dir, newfile);
379                         if (file.isDirectory()) {
380                             dirsExcluded.add(name);
381                         } else {
382                             filesExcluded.add(name);
383                         }
384                     }
385                     return;
386                 }
387             } catch (IOException ioe) {
388                 String msg = "IOException caught while checking for links!";
389                 // will be caught and redirected to Ant's logging system
390                 System.err.println(msg);
391             }
392         }
393 
394         if (filenameComparator != null) {
395             Arrays.sort(newfiles, filenameComparator);
396         }
397 
398         for (String newfile : newfiles) {
399             String name = vpath + newfile;
400             char[][] tokenizedName = MatchPattern.tokenizePathToCharArray(name, File.separator);
401             File file = new File(dir, newfile);
402             if (file.isDirectory()) {
403 
404                 if (isIncluded(name, tokenizedName)) {
405                     if (!isExcluded(name, tokenizedName)) {
406                         if (isSelected(name, file)) {
407                             dirsIncluded.add(name);
408                             if (fast) {
409                                 scandir(file, name + File.separator, fast);
410                             }
411                         } else {
412                             everythingIncluded = false;
413                             dirsDeselected.add(name);
414                             if (fast && couldHoldIncluded(name)) {
415                                 scandir(file, name + File.separator, fast);
416                             }
417                         }
418 
419                     } else {
420                         everythingIncluded = false;
421                         dirsExcluded.add(name);
422                         if (fast && couldHoldIncluded(name)) {
423                             scandir(file, name + File.separator, fast);
424                         }
425                     }
426                 } else {
427                     everythingIncluded = false;
428                     dirsNotIncluded.add(name);
429                     if (fast && couldHoldIncluded(name)) {
430                         scandir(file, name + File.separator, fast);
431                     }
432                 }
433                 if (!fast) {
434                     scandir(file, name + File.separator, fast);
435                 }
436             } else if (file.isFile()) {
437                 if (isIncluded(name, tokenizedName)) {
438                     if (!isExcluded(name, tokenizedName)) {
439                         if (isSelected(name, file)) {
440                             filesIncluded.add(name);
441                         } else {
442                             everythingIncluded = false;
443                             filesDeselected.add(name);
444                         }
445                     } else {
446                         everythingIncluded = false;
447                         filesExcluded.add(name);
448                     }
449                 } else {
450                     everythingIncluded = false;
451                     filesNotIncluded.add(name);
452                 }
453             }
454         }
455     }
456 
457     /**
458      * Tests whether a name should be selected.
459      *
460      * @param name the filename to check for selecting
461      * @param file the java.io.File object for this filename
462      * @return <code>false</code> when the selectors says that the file should not be selected, <code>true</code>
463      *         otherwise.
464      */
465     protected boolean isSelected(String name, File file) {
466         return true;
467     }
468 
469     /**
470      * Returns the names of the files which matched at least one of the include patterns and none of the exclude
471      * patterns. The names are relative to the base directory.
472      *
473      * @return the names of the files which matched at least one of the include patterns and none of the exclude
474      *         patterns.
475      */
476     @Override
477     public String[] getIncludedFiles() {
478         return filesIncluded.toArray(EMPTY_STRING_ARRAY);
479     }
480 
481     /**
482      * Returns the names of the files which matched none of the include patterns. The names are relative to the base
483      * directory. This involves performing a slow scan if one has not already been completed.
484      *
485      * @return the names of the files which matched none of the include patterns.
486      * @see #slowScan
487      */
488     public String[] getNotIncludedFiles() {
489         slowScan();
490         return filesNotIncluded.toArray(EMPTY_STRING_ARRAY);
491     }
492 
493     /**
494      * Returns the names of the files which matched at least one of the include patterns and at least one of the exclude
495      * patterns. The names are relative to the base directory. This involves performing a slow scan if one has not
496      * already been completed.
497      *
498      * @return the names of the files which matched at least one of the include patterns and at at least one of the
499      *         exclude patterns.
500      * @see #slowScan
501      */
502     public String[] getExcludedFiles() {
503         slowScan();
504         return filesExcluded.toArray(EMPTY_STRING_ARRAY);
505     }
506 
507     /**
508      * <p>Returns the names of the files which were selected out and therefore not ultimately included.</p>
509      *
510      * <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
511      * completed.</p>
512      *
513      * @return the names of the files which were deselected.
514      * @see #slowScan
515      */
516     public String[] getDeselectedFiles() {
517         slowScan();
518         return filesDeselected.toArray(EMPTY_STRING_ARRAY);
519     }
520 
521     /**
522      * Returns the names of the directories which matched at least one of the include patterns and none of the exclude
523      * patterns. The names are relative to the base directory.
524      *
525      * @return the names of the directories which matched at least one of the include patterns and none of the exclude
526      *         patterns.
527      */
528     @Override
529     public String[] getIncludedDirectories() {
530         return dirsIncluded.toArray(EMPTY_STRING_ARRAY);
531     }
532 
533     /**
534      * Returns the names of the directories which matched none of the include patterns. The names are relative to the
535      * base directory. This involves performing a slow scan if one has not already been completed.
536      *
537      * @return the names of the directories which matched none of the include patterns.
538      * @see #slowScan
539      */
540     public String[] getNotIncludedDirectories() {
541         slowScan();
542         return dirsNotIncluded.toArray(EMPTY_STRING_ARRAY);
543     }
544 
545     /**
546      * Returns the names of the directories which matched at least one of the include patterns and at least one of the
547      * exclude patterns. The names are relative to the base directory. This involves performing a slow scan if one has
548      * not already been completed.
549      *
550      * @return the names of the directories which matched at least one of the include patterns and at least one of the
551      *         exclude patterns.
552      * @see #slowScan
553      */
554     public String[] getExcludedDirectories() {
555         slowScan();
556         return dirsExcluded.toArray(EMPTY_STRING_ARRAY);
557     }
558 
559     /**
560      * <p>Returns the names of the directories which were selected out and therefore not ultimately included.</p>
561      *
562      * <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
563      * completed.</p>
564      *
565      * @return the names of the directories which were deselected.
566      * @see #slowScan
567      */
568     public String[] getDeselectedDirectories() {
569         slowScan();
570         return dirsDeselected.toArray(EMPTY_STRING_ARRAY);
571     }
572 
573     /**
574      * <p>Checks whether a given file is a symbolic link.</p>
575      *
576      * <p>It doesn't really test for symbolic links but whether the canonical and absolute paths of the file are identical
577      * - this may lead to false positives on some platforms.
578      * </p>
579      *
580      * @param parent the parent directory of the file to test
581      * @param name the name of the file to test.
582      * @return true if it's a symbolic link
583      * @throws java.io.IOException .
584      * @since Ant 1.5
585      */
586     public boolean isSymbolicLink(File parent, String name) throws IOException {
587         return NioFiles.isSymbolicLink(new File(parent, name));
588     }
589 
590     /**
591      * <p>Checks whether the parent of this file is a symbolic link.</p>
592      *
593      * <p>For java versions prior to 7 It doesn't really test for symbolic links but whether the canonical and absolute
594      * paths of the file are identical - this may lead to false positives on some platforms.</p>
595      *
596      * @param parent the parent directory of the file to test
597      * @param name the name of the file to test.
598      * @return true if it's a symbolic link
599      * @throws java.io.IOException .
600      * @since Ant 1.5
601      */
602     public boolean isParentSymbolicLink(File parent, String name) throws IOException {
603         return NioFiles.isSymbolicLink(parent);
604     }
605 }