Jar Search

Frustrated by all the time wasted expanding and repacking J2EE enterprise application archives (ear files) looking for bean classes and the like, I wrote this small tool for searching an archive recursively for files whose paths match a regular expression.

Here's a sample use:

$ java -jar jarsearch.jar -a CMRouter New_Project.ear
Searching for 'CMRouter' ...
There are 3 matches:
New_Project.ear>New_Project.jar>frameworkImpl/model/router/CMRouterBean.class
New_Project.ear>New_Project.jar>frameworkImpl/model/router/CMRouterLocal.class
New_Project.ear>New_Project.jar>frameworkImpl/model/router/CMRouterLocalHome.class

The Jar Search utility consists of one class:

package net.das.jarsearch;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import java.util.jar.JarInputStream;

import gnu.regexp.RE;
import gnu.regexp.REException;


/**
 * Searches a set of directories and archives for files whose names match a given
 * regular expression.  The search may optionally recurse directories or archives.
 *
 * @author <a href="doug@dseifert.net">Doug Seifert</a>
 */
public class JarSearch {
    private Set mInitialFiles;
    private Set mFiles;
    private RE mSearchRE;
    private boolean mRecurseDirectories = false;
    private boolean mRecurseArchives = false;


    /**
     * Create a search object that will search a set of directories, files and archives
     * for files whose path names match the provided regular expression.  The list
     * may contain files, in which case the search is made against the file itelf.  The
     * list may also contain directories, in which case the directory and optionally all
     * it's contents are searched.  Finally, the list may contain jar archives.  By default,
     * only archive contents are searched.  Optionally, archives within archives (archive
     * recursion) may be searched.
     *
     * @param aSearchTerm A string search expression that is turned into a
     *         gnu.regexp.RE object by invoking the RE(Object) constructor. 
     * @param lDirectories The set of files, directories and archives to search.
     * @throws REException If the search term can't be turned into a valid RE object.
     */
    public JarSearch(String aSearchTerm, Set lDirectories) throws REException {
        mSearchRE = new RE(aSearchTerm);
        mInitialFiles = lDirectories;
    }

    /**
     * Create a search object that will search a set of directories, files and archives
     * for files whose path names match the provided regular expression.  The list
     * may contain files, in which case the search is made against the file itelf.  The
     * list may also contain directories, in which case the directory and optionally all
     * it's contents are searched.  Finally, the list may contain jar archives.  By default,
     * only archive contents are searched.  Optionally, archives within archives (archive
     * recursion) may be searched.
     *
     * @param aSearchRE A gnu.regexp.RE used to match file path names against.
     * @param lDirectories The set of files, directories and archives to search.
     */
    public JarSearch(RE aSearchRE, Set lDirectories) {
        mSearchRE = aSearchRE;
        mInitialFiles = lDirectories;
    }

    /**
     * The main program for the JarSearch class.  This program may be invoked as follows:
     * <code>java net.das.jarsearch.JarSearch [-r] [-a] {regexpstring} [file ...]</code>
     * <ul>
     * <li>If the -r flag is provided, any directories provided as arguments will be recursed.</li>
     * <li>If the -a flag is provided, any archives provided as arguments, or any archives found
     * as a result of directory recursion, will be recursed</li>
     * </ul>
     *
     * <p>
     * Archive recursion means that archives nested with archives to any level will be searched.
     * </p>
     * <p>
     * This method will call System.exit(0) if the search was performed without an error.  If an
     * error occurs (for example, because the regexpstring can't be parsed into a valide
     * gnu.regexp.RE object), System.exit(1) will be called.  System.exit(2)
     * will be called in the event of a usage error.
     * </p>
     * <p>
     * Example:<br>
     * java net.das.jarsearch.JarSearch -a Bean foo.ear<br>
     * Will list all files whose names contain the substring 'Bean' in the given ear archive.
     * Module archives that might be contained in the ear will also be searched.
     * </p>
     *
     * @param args The command line arguments
     */
    public static void main(String[] args) {

        if (args.length < 2) {
            System.err.println("Usage: java " + JarSearch.class + " [-r] [-a] <regexpstring> [file ...]");
            System.err.println("    <regexpstring> is a string that can be parsed into a "
                + "valid gnu.regexp.RE object.");
            System.err.println("    zero or more directories may be specified. If none are specified,"
                + " the current working directory is searched.");
            System.exit(2);
        }

        try {
            int lArgIndex = 0;
            boolean lRecurseDirs = false;
            boolean lRecurseArchives = false;
            while (args[lArgIndex].startsWith("-")) {
                if ("-a".equals(args[lArgIndex])) {
                    lRecurseArchives = true;
                } else if ("-r".equals(args[lArgIndex])) {
                    System.out.println("Arg is -r, recurse dirs = true");
                    lRecurseDirs = true;
                }
                lArgIndex++;
            }

            if (args.length - lArgIndex < 2) {
                System.err.println("Usage: java " + JarSearch.class + " [-r] [-a] <regexpstring> [file ...]");
                System.err.println("    <regexpstring> is a string that can be parsed into a "
                    + "valid gnu.regexp.RE object.");
                System.err.println("    zero or more directories may be specified. If none are specified,"
                    + " the current working directory is searched.");
                System.exit(2);
            }

            String lSearchTerm = args[lArgIndex++];

            Set lDirectories = new TreeSet();
            for (int i = lArgIndex; i < args.length; ++i) {
                lDirectories.add(new File(args[i]));
            }

            JarSearch lSearch = new JarSearch(lSearchTerm, lDirectories);
            lSearch.setRecurseArchives(lRecurseArchives);
            lSearch.setRecurseDirectories(lRecurseDirs);

            System.out.println("Searching for '" + lSearchTerm + "' ...");
            List lResults = lSearch.execute();

            System.out.println("There are " + lResults.size() + " matches:");
            Iterator i = lResults.iterator();
            while (i.hasNext()) {
                String lMatch = (String) i.next();
                System.out.println(lMatch);
            }
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
        System.exit(0);
    }

    /**
     * Flag the search to recurse directories.
     *
     * @param aFlag Turn on or off directory recursion.
     */
    public void setRecurseDirectories(boolean aFlag) {
        mRecurseDirectories = aFlag;
    }

    /**
     * Flag the search to recurse archives.  If this is true,
     * archives within archives will be searched.
     *
     * @param aFlag Turn on or off archive recursion.
     */
    public void setRecurseArchives(boolean aFlag) {
        mRecurseArchives = aFlag;
    }

    /**
     * Perform the search and return a List of search results.  The result is a list
     * of String objects of the form:
     * <pre>
     * path/that/matched
     * path/of/archive/that/matched.jar
     * archive.jar>that/has/a/matching/file
     * an/archive.jar>within/an/archive.jar>that/has/a/match
     * ...
     * </pre>
     * @return A list of matches of the search expression
     */
    public List execute() {

        // Find all files first. Returns a list of all command line args
        // and their children, recursively, if the -r flag was provided
        mFiles = findAllFiles(mInitialFiles);

        List lMatches = new ArrayList();
        Iterator i = mFiles.iterator();
        while (i.hasNext()) {
            File lFile = (File) i.next();

            // Is the file an archive, perform an archive search
            if (isArchive(lFile.getName())) {
                try {
                    checkArchiveFile(lFile, lMatches);
                } catch (IOException ioe) {
                    System.out.println("Error checking archive: " + ioe);
                }
            } else {
                // It is a regular file, just match the path name against the RE
                if (mSearchRE.getMatch(lFile.getPath()) != null) {
                    lMatches.add(lFile.getPath());
                }
            }
        }
        return lMatches;
    }


    private void checkArchiveFile(File aFile, List aMatches)
        throws IOException {
        JarFile lJar = new JarFile(aFile);
        try {
            // Go through the jar entries looking for matches
            Enumeration lEntries = lJar.entries();
            while (lEntries.hasMoreElements()) {
                JarEntry lEntry = (JarEntry) lEntries.nextElement();
                checkEntry(lJar.getName(), lJar, lEntry, aMatches);
            }
        } finally {
            lJar.close();
        }
    }

    private void checkArchiveStream(String aPrefix, JarInputStream aStream, List aMatches) throws IOException {
        //System.out.println("Checking stream: " + aPrefix);
        JarEntry lEntry = null;
        while ((lEntry = aStream.getNextJarEntry()) != null) {

            try {
                checkEntry(aPrefix, null, lEntry, aMatches);

                if (isArchive(lEntry.getName()) && mRecurseArchives) {
                    // We have an archive within an archive, read the data and create a new
                    // Jar input stream for it.  We don't want to close this stream, because
                    // it is a substream of a larger open enclosing stream.
                    JarInputStream lNewStream = new JarInputStream(aStream);
                    checkArchiveStream(aPrefix + ">" + lEntry.getName(), lNewStream, aMatches);
                } else {
                    // Just read and discard the data to get to the next entry
                    byte [] lBuf = new byte[4096];
                    while (aStream.read(lBuf, 0, 4096) > 0) {
                        // do nothing, throw away the data
                    }
                }
            } finally {
                aStream.closeEntry();
            }
        }
    }

    private void checkEntry(String aPrefix, JarFile aOriginalFile, JarEntry aEntry, List aMatches) throws IOException {
        // If we are looking at an archive within a top-level (on the filesystem) archive,
        // open a stream and look inside it if the -a flag was specified.
        if (aOriginalFile != null && isArchive(aEntry.getName()) && mRecurseArchives) {
             JarInputStream lStream = new JarInputStream(aOriginalFile.getInputStream(aEntry));
             try {
                 checkArchiveStream(aPrefix + ">" + aEntry.getName(), lStream, aMatches);
             } finally {
                 lStream.close();
             }
        } else {
            if (mSearchRE.getMatch(aEntry.getName()) != null) {
                aMatches.add(aPrefix + ">" + aEntry.getName());
            }
        }
    }

    private boolean isArchive(String aName) {
        return (aName.toLowerCase().endsWith(".jar")
             || aName.toLowerCase().endsWith(".ear")
             || aName.toLowerCase().endsWith(".zip")
             || aName.toLowerCase().endsWith(".rar")
             || aName.toLowerCase().endsWith(".war"));
    }

    private Set findAllFiles(Set aBaseDirs) {
        // Find all the files in the input set, performs initial directory recursion
        Set lFiles = new TreeSet();
        Iterator i = aBaseDirs.iterator();
        while (i.hasNext()) {
            File lBaseDir = (File) i.next();
            if (lBaseDir.isDirectory() && mRecurseDirectories) {
                findRegularFiles(lBaseDir, lFiles);
                findArchives(lBaseDir, lFiles);
            }
            lFiles.add(lBaseDir);
        }
        return lFiles;
    }


    private void findArchives(File aDir, final Set aFiles) {
        File[] lArchiveFiles = aDir.listFiles(
            new FileFilter() {
                public boolean accept(File pathname) {
                    if (pathname.isDirectory()) {
                        findArchives(pathname, aFiles);
                        return false;
                    }
                    if (isArchive(pathname.getPath())) {
                        return true;
                    }
                    return false;
                }
            });
        if (lArchiveFiles != null) {
            for (int i = 0; i < lArchiveFiles.length; ++i) {
                aFiles.add(lArchiveFiles[i]);
            }
        }
    }


    private void findRegularFiles(File aDir, final Set aFiles) {
        File[] lRegularFiles = aDir.listFiles(
            new FileFilter() {
                public boolean accept(File pathname) {
                    if (pathname.isDirectory() && mRecurseDirectories) {
                        findRegularFiles(pathname, aFiles);
                        return false;
                    }
                    return true;
                }
            });
        if (lRegularFiles != null) {
            for (int i = 0; i < lRegularFiles.length; ++i) {
                aFiles.add(lRegularFiles[i]);
            }
        }
    }
}

To compile it, you will need the the GNU regexp jar, a copy of which can be found in the tarball here: gzipped tar archive (33K)

The above archive contains an ant based project with full source. You are welcome to use this code as you see fit. Just untar the archive and run "ant dist" to produce jarsearch.jar in the project directory.