View Javadoc
1   package org.codehaus.plexus.util;
2   
3   /*
4    * The Apache Software License, Version 1.1
5    *
6    * Copyright (c) 2000-2003 The Apache Software Foundation.  All rights
7    * reserved.
8    *
9    * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   *
13   * 1. Redistributions of source code must retain the above copyright
14   *    notice, this list of conditions and the following disclaimer.
15   *
16   * 2. Redistributions in binary form must reproduce the above copyright
17   *    notice, this list of conditions and the following disclaimer in
18   *    the documentation and/or other materials provided with the
19   *    distribution.
20   *
21   * 3. The end-user documentation included with the redistribution, if
22   *    any, must include the following acknowledgement:
23   *       "This product includes software developed by the
24   *        Apache Software Foundation (http://www.codehaus.org/)."
25   *    Alternately, this acknowledgement may appear in the software itself,
26   *    if and wherever such third-party acknowledgements normally appear.
27   *
28   * 4. The names "Ant" and "Apache Software
29   *    Foundation" must not be used to endorse or promote products derived
30   *    from this software without prior written permission. For written
31   *    permission, please contact codehaus@codehaus.org.
32   *
33   * 5. Products derived from this software may not be called "Apache"
34   *    nor may "Apache" appear in their names without prior written
35   *    permission of the Apache Group.
36   *
37   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48   * SUCH DAMAGE.
49   * ====================================================================
50   *
51   * This software consists of voluntary contributions made by many
52   * individuals on behalf of the Apache Software Foundation.  For more
53   * information on the Apache Software Foundation, please see
54   * <http://www.codehaus.org/>.
55   */
56  
57  import java.io.File;
58  import java.io.IOException;
59  import java.util.ArrayList;
60  import java.util.Vector;
61  
62  /**
63   * Class for scanning a directory for files/directories which match certain criteria.
64   * <p/>
65   * These criteria consist of selectors and patterns which have been specified. With the selectors you can select which
66   * files you want to have included. Files which are not selected are excluded. With patterns you can include or exclude
67   * files based on their filename.
68   * <p/>
69   * The idea is simple. A given directory is recursively scanned for all files and directories. Each file/directory is
70   * matched against a set of selectors, including special support for matching against filenames with include and and
71   * exclude patterns. Only files/directories which match at least one pattern of the include pattern list or other file
72   * selector, and don't match any pattern of the exclude pattern list or fail to match against a required selector will
73   * be placed in the list of files/directories found.
74   * <p/>
75   * When no list of include patterns is supplied, "**" will be used, which means that everything will be matched. When no
76   * list of exclude patterns is supplied, an empty list is used, such that nothing will be excluded. When no selectors
77   * are supplied, none are applied.
78   * <p/>
79   * The filename pattern matching is done as follows: The name to be matched is split up in path segments. A path segment
80   * is the name of a directory or file, which is bounded by <code>File.separator</code> ('/' under UNIX, '\' under
81   * Windows). For example, "abc/def/ghi/xyz.java" is split up in the segments "abc", "def","ghi" and "xyz.java". The same
82   * is done for the pattern against which should be matched.
83   * <p/>
84   * The segments of the name and the pattern are then matched against each other. When '**' is used for a path segment in
85   * the pattern, it matches zero or more path segments of the name.
86   * <p/>
87   * There is a special case regarding the use of <code>File.separator</code>s at the beginning of the pattern and the
88   * string to match:<br>
89   * When a pattern starts with a <code>File.separator</code>, the string to match must also start with a
90   * <code>File.separator</code>. When a pattern does not start with a <code>File.separator</code>, the string to match
91   * may not start with a <code>File.separator</code>. When one of these rules is not obeyed, the string will not match.
92   * <p/>
93   * When a name path segment is matched against a pattern path segment, the following special characters can be used:<br>
94   * '*' matches zero or more characters<br>
95   * '?' matches one character.
96   * <p/>
97   * Examples:
98   * <p/>
99   * "**\*.class" matches all .class files/dirs in a directory tree.
100  * <p/>
101  * "test\a??.java" matches all files/dirs which start with an 'a', then two more characters and then ".java", in a
102  * directory called test.
103  * <p/>
104  * "**" matches everything in a directory tree.
105  * <p/>
106  * "**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent directory called test
107  * (e.g. "abc\test\def\ghi\XYZ123").
108  * <p/>
109  * Case sensitivity may be turned off if necessary. By default, it is turned on.
110  * <p/>
111  * Example of usage:
112  * 
113  * <pre>
114  * String[] includes = { "**\\*.class" };
115  * String[] excludes = { "modules\\*\\**" };
116  * ds.setIncludes( includes );
117  * ds.setExcludes( excludes );
118  * ds.setBasedir( new File( "test" ) );
119  * ds.setCaseSensitive( true );
120  * ds.scan();
121  *
122  * System.out.println( "FILES:" );
123  * String[] files = ds.getIncludedFiles();
124  * for ( int i = 0; i < files.length; i++ )
125  * {
126  *     System.out.println( files[i] );
127  * }
128  * </pre>
129  * 
130  * This will scan a directory called test for .class files, but excludes all files in all proper subdirectories of a
131  * directory called "modules"
132  *
133  * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
134  * @author Magesh Umasankar
135  * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
136  * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
137  */
138 public class DirectoryScanner
139     extends AbstractScanner
140 {
141 
142     /**
143      * The base directory to be scanned.
144      */
145     protected File basedir;
146 
147     /**
148      * The files which matched at least one include and no excludes and were selected.
149      */
150     protected Vector<String> filesIncluded;
151 
152     /**
153      * The files which did not match any includes or selectors.
154      */
155     protected Vector<String> filesNotIncluded;
156 
157     /**
158      * The files which matched at least one include and at least one exclude.
159      */
160     protected Vector<String> filesExcluded;
161 
162     /**
163      * The directories which matched at least one include and no excludes and were selected.
164      */
165     protected Vector<String> dirsIncluded;
166 
167     /**
168      * The directories which were found and did not match any includes.
169      */
170     protected Vector<String> dirsNotIncluded;
171 
172     /**
173      * The directories which matched at least one include and at least one exclude.
174      */
175     protected Vector<String> dirsExcluded;
176 
177     /**
178      * The files which matched at least one include and no excludes and which a selector discarded.
179      */
180     protected Vector<String> filesDeselected;
181 
182     /**
183      * The directories which matched at least one include and no excludes but which a selector discarded.
184      */
185     protected Vector<String> dirsDeselected;
186 
187     /**
188      * Whether or not our results were built by a slow scan.
189      */
190     protected boolean haveSlowResults = false;
191 
192     /**
193      * Whether or not symbolic links should be followed.
194      *
195      * @since Ant 1.5
196      */
197     private boolean followSymlinks = true;
198 
199     /**
200      * Whether or not everything tested so far has been included.
201      */
202     protected boolean everythingIncluded = true;
203 
204     private final String[] tokenizedEmpty = MatchPattern.tokenizePathToString( "", File.separator );
205 
206     /**
207      * Sole constructor.
208      */
209     public DirectoryScanner()
210     {
211     }
212 
213     /**
214      * Sets the base directory to be scanned. This is the directory which is scanned recursively. All '/' and '\'
215      * characters are replaced by <code>File.separatorChar</code>, so the separator used need not match
216      * <code>File.separatorChar</code>.
217      *
218      * @param basedir The base directory to scan. Must not be <code>null</code>.
219      */
220     public void setBasedir( String basedir )
221     {
222         setBasedir( new File( basedir.replace( '/', File.separatorChar ).replace( '\\', File.separatorChar ) ) );
223     }
224 
225     /**
226      * Sets the base directory to be scanned. This is the directory which is scanned recursively.
227      *
228      * @param basedir The base directory for scanning. Should not be <code>null</code>.
229      */
230     public void setBasedir( File basedir )
231     {
232         this.basedir = basedir;
233     }
234 
235     /**
236      * Returns the base directory to be scanned. This is the directory which is scanned recursively.
237      *
238      * @return the base directory to be scanned
239      */
240     public File getBasedir()
241     {
242         return basedir;
243     }
244 
245     /**
246      * Sets whether or not symbolic links should be followed.
247      *
248      * @param followSymlinks whether or not symbolic links should be followed
249      */
250     public void setFollowSymlinks( boolean followSymlinks )
251     {
252         this.followSymlinks = followSymlinks;
253     }
254 
255     /**
256      * Returns whether or not the scanner has included all the files or directories it has come across so far.
257      *
258      * @return <code>true</code> if all files and directories which have been found so far have been included.
259      */
260     public boolean isEverythingIncluded()
261     {
262         return everythingIncluded;
263     }
264 
265     /**
266      * Scans the base directory for files which match at least one include pattern and don't match any exclude patterns.
267      * If there are selectors then the files must pass muster there, as well.
268      *
269      * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is <code>null</code>, doesn't
270      *             exist, or isn't a directory).
271      */
272     public void scan()
273         throws IllegalStateException
274     {
275         if ( basedir == null )
276         {
277             throw new IllegalStateException( "No basedir set" );
278         }
279         if ( !basedir.exists() )
280         {
281             throw new IllegalStateException( "basedir " + basedir + " does not exist" );
282         }
283         if ( !basedir.isDirectory() )
284         {
285             throw new IllegalStateException( "basedir " + basedir + " is not a directory" );
286         }
287 
288         setupDefaultFilters();
289         setupMatchPatterns();
290 
291         filesIncluded = new Vector<String>();
292         filesNotIncluded = new Vector<String>();
293         filesExcluded = new Vector<String>();
294         filesDeselected = new Vector<String>();
295         dirsIncluded = new Vector<String>();
296         dirsNotIncluded = new Vector<String>();
297         dirsExcluded = new Vector<String>();
298         dirsDeselected = new Vector<String>();
299 
300         if ( isIncluded( "", tokenizedEmpty ) )
301         {
302 
303             if ( !isExcluded( "", tokenizedEmpty ) )
304             {
305                 if ( isSelected( "", basedir ) )
306                 {
307                     dirsIncluded.addElement( "" );
308                 }
309                 else
310                 {
311                     dirsDeselected.addElement( "" );
312                 }
313             }
314             else
315             {
316                 dirsExcluded.addElement( "" );
317             }
318         }
319         else
320         {
321             dirsNotIncluded.addElement( "" );
322         }
323         scandir( basedir, "", true );
324     }
325 
326     /**
327      * Top level invocation for a slow scan. A slow scan builds up a full list of excluded/included files/directories,
328      * whereas a fast scan will only have full results for included files, as it ignores directories which can't
329      * possibly hold any included files/directories.
330      * <p/>
331      * Returns immediately if a slow scan has already been completed.
332      */
333     protected void slowScan()
334     {
335         if ( haveSlowResults )
336         {
337             return;
338         }
339 
340         String[] excl = new String[dirsExcluded.size()];
341         dirsExcluded.copyInto( excl );
342 
343         String[] notIncl = new String[dirsNotIncluded.size()];
344         dirsNotIncluded.copyInto( notIncl );
345 
346         for ( String anExcl : excl )
347         {
348             if ( !couldHoldIncluded( anExcl ) )
349             {
350                 scandir( new File( basedir, anExcl ), anExcl + File.separator, false );
351             }
352         }
353 
354         for ( String aNotIncl : notIncl )
355         {
356             if ( !couldHoldIncluded( aNotIncl ) )
357             {
358                 scandir( new File( basedir, aNotIncl ), aNotIncl + File.separator, false );
359             }
360         }
361 
362         haveSlowResults = true;
363     }
364 
365     /**
366      * Scans the given directory for files and directories. Found files and directories are placed in their respective
367      * collections, based on the matching of includes, excludes, and the selectors. When a directory is found, it is
368      * scanned recursively.
369      *
370      * @param dir The directory to scan. Must not be <code>null</code>.
371      * @param vpath The path relative to the base directory (needed to prevent problems with an absolute path when using
372      *            dir). Must not be <code>null</code>.
373      * @param fast Whether or not this call is part of a fast scan.
374      * @see #filesIncluded
375      * @see #filesNotIncluded
376      * @see #filesExcluded
377      * @see #dirsIncluded
378      * @see #dirsNotIncluded
379      * @see #dirsExcluded
380      * @see #slowScan
381      */
382     protected void scandir( File dir, String vpath, boolean fast )
383     {
384         String[] newfiles = dir.list();
385 
386         if ( newfiles == null )
387         {
388             /*
389              * two reasons are mentioned in the API docs for File.list (1) dir is not a directory. This is impossible as
390              * we wouldn't get here in this case. (2) an IO error occurred (why doesn't it throw an exception then???)
391              */
392 
393             /*
394              * [jdcasey] (2) is apparently happening to me, as this is killing one of my tests... this is affecting the
395              * assembly plugin, fwiw. I will initialize the newfiles array as zero-length for now. NOTE: I can't find
396              * the problematic code, as it appears to come from a native method in UnixFileSystem...
397              */
398             /*
399              * [bentmann] A null array will also be returned from list() on NTFS when dir refers to a soft link or
400              * junction point whose target is not existent.
401              */
402             newfiles = new String[0];
403 
404             // throw new IOException( "IO error scanning directory " + dir.getAbsolutePath() );
405         }
406 
407         if ( !followSymlinks )
408         {
409             ArrayList<String> noLinks = new ArrayList<String>();
410             for ( String newfile : newfiles )
411             {
412                 try
413                 {
414                     if ( isParentSymbolicLink( dir, newfile ) )
415                     {
416                         String name = vpath + newfile;
417                         File file = new File( dir, newfile );
418                         if ( file.isDirectory() )
419                         {
420                             dirsExcluded.addElement( name );
421                         }
422                         else
423                         {
424                             filesExcluded.addElement( name );
425                         }
426                     }
427                     else
428                     {
429                         noLinks.add( newfile );
430                     }
431                 }
432                 catch ( IOException ioe )
433                 {
434                     String msg = "IOException caught while checking " + "for links, couldn't get canonical path!";
435                     // will be caught and redirected to Ant's logging system
436                     System.err.println( msg );
437                     noLinks.add( newfile );
438                 }
439             }
440             newfiles = noLinks.toArray( new String[noLinks.size()] );
441         }
442 
443         for ( String newfile : newfiles )
444         {
445             String name = vpath + newfile;
446             String[] tokenizedName = MatchPattern.tokenizePathToString( name, File.separator );
447             File file = new File( dir, newfile );
448             if ( file.isDirectory() )
449             {
450 
451                 if ( isIncluded( name, tokenizedName ) )
452                 {
453                     if ( !isExcluded( name, tokenizedName ) )
454                     {
455                         if ( isSelected( name, file ) )
456                         {
457                             dirsIncluded.addElement( name );
458                             if ( fast )
459                             {
460                                 scandir( file, name + File.separator, fast );
461                             }
462                         }
463                         else
464                         {
465                             everythingIncluded = false;
466                             dirsDeselected.addElement( name );
467                             if ( fast && couldHoldIncluded( name ) )
468                             {
469                                 scandir( file, name + File.separator, fast );
470                             }
471                         }
472 
473                     }
474                     else
475                     {
476                         everythingIncluded = false;
477                         dirsExcluded.addElement( name );
478                         if ( fast && couldHoldIncluded( name ) )
479                         {
480                             scandir( file, name + File.separator, fast );
481                         }
482                     }
483                 }
484                 else
485                 {
486                     everythingIncluded = false;
487                     dirsNotIncluded.addElement( name );
488                     if ( fast && couldHoldIncluded( name ) )
489                     {
490                         scandir( file, name + File.separator, fast );
491                     }
492                 }
493                 if ( !fast )
494                 {
495                     scandir( file, name + File.separator, fast );
496                 }
497             }
498             else if ( file.isFile() )
499             {
500                 if ( isIncluded( name, tokenizedName ) )
501                 {
502                     if ( !isExcluded( name, tokenizedName ) )
503                     {
504                         if ( isSelected( name, file ) )
505                         {
506                             filesIncluded.addElement( name );
507                         }
508                         else
509                         {
510                             everythingIncluded = false;
511                             filesDeselected.addElement( name );
512                         }
513                     }
514                     else
515                     {
516                         everythingIncluded = false;
517                         filesExcluded.addElement( name );
518                     }
519                 }
520                 else
521                 {
522                     everythingIncluded = false;
523                     filesNotIncluded.addElement( name );
524                 }
525             }
526         }
527     }
528 
529     /**
530      * Tests whether a name should be selected.
531      *
532      * @param name the filename to check for selecting
533      * @param file the java.io.File object for this filename
534      * @return <code>false</code> when the selectors says that the file should not be selected, <code>true</code>
535      *         otherwise.
536      */
537     protected boolean isSelected( String name, File file )
538     {
539         return true;
540     }
541 
542     /**
543      * Returns the names of the files which matched at least one of the include patterns and none of the exclude
544      * patterns. The names are relative to the base directory.
545      *
546      * @return the names of the files which matched at least one of the include patterns and none of the exclude
547      *         patterns.
548      */
549     public String[] getIncludedFiles()
550     {
551         String[] files = new String[filesIncluded.size()];
552         filesIncluded.copyInto( files );
553         return files;
554     }
555 
556     /**
557      * Returns the names of the files which matched none of the include patterns. The names are relative to the base
558      * directory. This involves performing a slow scan if one has not already been completed.
559      *
560      * @return the names of the files which matched none of the include patterns.
561      * @see #slowScan
562      */
563     public String[] getNotIncludedFiles()
564     {
565         slowScan();
566         String[] files = new String[filesNotIncluded.size()];
567         filesNotIncluded.copyInto( files );
568         return files;
569     }
570 
571     /**
572      * Returns the names of the files which matched at least one of the include patterns and at least one of the exclude
573      * patterns. The names are relative to the base directory. This involves performing a slow scan if one has not
574      * already been completed.
575      *
576      * @return the names of the files which matched at least one of the include patterns and at at least one of the
577      *         exclude patterns.
578      * @see #slowScan
579      */
580     public String[] getExcludedFiles()
581     {
582         slowScan();
583         String[] files = new String[filesExcluded.size()];
584         filesExcluded.copyInto( files );
585         return files;
586     }
587 
588     /**
589      * <p>
590      * Returns the names of the files which were selected out and therefore not ultimately included.
591      * </p>
592      * <p/>
593      * <p>
594      * The names are relative to the base directory. This involves performing a slow scan if one has not already been
595      * completed.
596      * </p>
597      *
598      * @return the names of the files which were deselected.
599      * @see #slowScan
600      */
601     public String[] getDeselectedFiles()
602     {
603         slowScan();
604         String[] files = new String[filesDeselected.size()];
605         filesDeselected.copyInto( files );
606         return files;
607     }
608 
609     /**
610      * Returns the names of the directories which matched at least one of the include patterns and none of the exclude
611      * patterns. The names are relative to the base directory.
612      *
613      * @return the names of the directories which matched at least one of the include patterns and none of the exclude
614      *         patterns.
615      */
616     public String[] getIncludedDirectories()
617     {
618         String[] directories = new String[dirsIncluded.size()];
619         dirsIncluded.copyInto( directories );
620         return directories;
621     }
622 
623     /**
624      * Returns the names of the directories which matched none of the include patterns. The names are relative to the
625      * base directory. This involves performing a slow scan if one has not already been completed.
626      *
627      * @return the names of the directories which matched none of the include patterns.
628      * @see #slowScan
629      */
630     public String[] getNotIncludedDirectories()
631     {
632         slowScan();
633         String[] directories = new String[dirsNotIncluded.size()];
634         dirsNotIncluded.copyInto( directories );
635         return directories;
636     }
637 
638     /**
639      * Returns the names of the directories which matched at least one of the include patterns and at least one of the
640      * exclude patterns. The names are relative to the base directory. This involves performing a slow scan if one has
641      * not already been completed.
642      *
643      * @return the names of the directories which matched at least one of the include patterns and at least one of the
644      *         exclude patterns.
645      * @see #slowScan
646      */
647     public String[] getExcludedDirectories()
648     {
649         slowScan();
650         String[] directories = new String[dirsExcluded.size()];
651         dirsExcluded.copyInto( directories );
652         return directories;
653     }
654 
655     /**
656      * <p>
657      * Returns the names of the directories which were selected out and therefore not ultimately included.
658      * </p>
659      * <p/>
660      * <p>
661      * The names are relative to the base directory. This involves performing a slow scan if one has not already been
662      * completed.
663      * </p>
664      *
665      * @return the names of the directories which were deselected.
666      * @see #slowScan
667      */
668     public String[] getDeselectedDirectories()
669     {
670         slowScan();
671         String[] directories = new String[dirsDeselected.size()];
672         dirsDeselected.copyInto( directories );
673         return directories;
674     }
675 
676     /**
677      * Checks whether a given file is a symbolic link.
678      * <p/>
679      * <p>
680      * It doesn't really test for symbolic links but whether the canonical and absolute paths of the file are identical
681      * - this may lead to false positives on some platforms.
682      * </p>
683      *
684      * @param parent the parent directory of the file to test
685      * @param name the name of the file to test.
686      * @return true if it's a symbolic link
687      * @throws java.io.IOException .
688      * @since Ant 1.5
689      */
690     public boolean isSymbolicLink( File parent, String name )
691         throws IOException
692     {
693         if ( Java7Detector.isJava7() )
694         {
695             return NioFiles.isSymbolicLink( new File( parent, name ) );
696         }
697         File resolvedParent = new File( parent.getCanonicalPath() );
698         File toTest = new File( resolvedParent, name );
699         return !toTest.getAbsolutePath().equals( toTest.getCanonicalPath() );
700     }
701 
702     /**
703      * Checks whether the parent of this file is a symbolic link.
704      * <p/>
705      * <p>
706      * For java versions prior to 7 It doesn't really test for symbolic links but whether the canonical and absolute
707      * paths of the file are identical - this may lead to false positives on some platforms.
708      * </p>
709      *
710      * @param parent the parent directory of the file to test
711      * @param name the name of the file to test.
712      * @return true if it's a symbolic link
713      * @throws java.io.IOException .
714      * @since Ant 1.5
715      */
716     public boolean isParentSymbolicLink( File parent, String name )
717         throws IOException
718     {
719         if ( Java7Detector.isJava7() )
720         {
721             return NioFiles.isSymbolicLink( parent );
722         }
723         File resolvedParent = new File( parent.getCanonicalPath() );
724         File toTest = new File( resolvedParent, name );
725         return !toTest.getAbsolutePath().equals( toTest.getCanonicalPath() );
726     }
727 }