/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.tools.zip;
import static org.apache.tools.zip.ZipConstants.DWORD;
import static org.apache.tools.zip.ZipConstants.SHORT;
import static org.apache.tools.zip.ZipConstants.WORD;
import static org.apache.tools.zip.ZipConstants.ZIP64_MAGIC;
import static org.apache.tools.zip.ZipConstants.ZIP64_MAGIC_SHORT;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import java.util.zip.ZipException;
/**
* Replacement for java.util.ZipFile
.
*
*
This class adds support for file name encodings other than UTF-8
* (which is required to work on ZIP files created by native zip tools
* and is able to skip a preamble like the one found in self
* extracting archives. Furthermore it returns instances of
* org.apache.tools.zip.ZipEntry
instead of
* java.util.zip.ZipEntry
.
It doesn't extend java.util.zip.ZipFile
as it would
* have to reimplement all methods anyway. Like
* java.util.ZipFile
, it uses RandomAccessFile under the
* covers and supports compressed and uncompressed entries. As of
* Apache Ant 1.9.0 it also transparently supports Zip64
* extensions and thus individual entries and archives larger than 4
* GB or with more than 65536 entries.
The method signatures mimic the ones of
* java.util.zip.ZipFile
, with a couple of exceptions:
*
*
org.apache.tools.zip.ZipEntry
instances.For a list of possible values see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html. * Defaults to the platform's default character encoding.
*/ private final String encoding; /** * The zip encoding to use for filenames and the file comment. */ private final ZipEncoding zipEncoding; /** * File name of actual source. */ private final String archiveName; /** * The actual data source. */ private final RandomAccessFile archive; /** * Whether to look for and use Unicode extra fields. */ private final boolean useUnicodeExtraFields; /** * Whether the file is closed. */ private volatile boolean closed; // cached buffers private final byte[] DWORD_BUF = new byte[DWORD]; private final byte[] WORD_BUF = new byte[WORD]; private final byte[] CFH_BUF = new byte[CFH_LEN]; private final byte[] SHORT_BUF = new byte[SHORT]; /** * Opens the given file for reading, assuming the platform's * native encoding for file names. * * @param f the archive. * * @throws IOException if an error occurs while reading the file. */ public ZipFile(final File f) throws IOException { this(f, null); } /** * Opens the given file for reading, assuming the platform's * native encoding for file names. * * @param name name of the archive. * * @throws IOException if an error occurs while reading the file. */ public ZipFile(final String name) throws IOException { this(new File(name), null); } /** * Opens the given file for reading, assuming the specified * encoding for file names, scanning unicode extra fields. * * @param name name of the archive. * @param encoding the encoding to use for file names, use null * for the platform's default encoding * * @throws IOException if an error occurs while reading the file. */ public ZipFile(final String name, final String encoding) throws IOException { this(new File(name), encoding, true); } /** * Opens the given file for reading, assuming the specified * encoding for file names and scanning for unicode extra fields. * * @param f the archive. * @param encoding the encoding to use for file names, use null * for the platform's default encoding * * @throws IOException if an error occurs while reading the file. */ public ZipFile(final File f, final String encoding) throws IOException { this(f, encoding, true); } /** * Opens the given file for reading, assuming the specified * encoding for file names. * * @param f the archive. * @param encoding the encoding to use for file names, use null * for the platform's default encoding * @param useUnicodeExtraFields whether to use InfoZIP Unicode * Extra Fields (if present) to set the file names. * * @throws IOException if an error occurs while reading the file. */ public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) throws IOException { this.archiveName = f.getAbsolutePath(); this.encoding = encoding; this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); this.useUnicodeExtraFields = useUnicodeExtraFields; archive = new RandomAccessFile(f, "r"); boolean success = false; try { final MapEntries will be returned in the same order they appear * within the archive's central directory.
* * @return all entries as {@link ZipEntry} instances */ public EnumerationEntries will be returned in the same order their contents * appear within the archive.
* * @return all entries as {@link ZipEntry} instances * * @since Ant 1.9.0 */ public EnumerationIf multiple entries with the same name exist the first entry * in the archive's central directory by that name is * returned.
* * @param name name of the entry. * @return the ZipEntry corresponding to the given name - or * {@code null} if not present. */ public ZipEntry getEntry(final String name) { final LinkedListMay return false if it is set up to use encryption or a * compression method that hasn't been implemented yet.
*/ public boolean canReadEntryData(final ZipEntry ze) { return ZipUtil.canHandleEntryData(ze); } /** * Returns an InputStream for reading the contents of the given entry. * * @param ze the entry to get the stream for. * @return a stream to read the entry from. * @throws IOException if unable to create an input stream from the zipentry * @throws ZipException if the zipentry uses an unsupported feature */ public InputStream getInputStream(final ZipEntry ze) throws IOException, ZipException { if (!(ze instanceof Entry)) { return null; } // cast valididty is checked just above final OffsetEntry offsetEntry = ((Entry) ze).getOffsetEntry(); ZipUtil.checkRequestedFeatures(ze); final long start = offsetEntry.dataOffset; final BoundedInputStream bis = new BoundedInputStream(start, ze.getCompressedSize()); switch (ze.getMethod()) { case ZipEntry.STORED: return bis; case ZipEntry.DEFLATED: bis.addDummy(); final Inflater inflater = new Inflater(true); return new InflaterInputStream(bis, inflater) { @Override public void close() throws IOException { super.close(); inflater.end(); } }; default: throw new ZipException("Found unsupported compression method " + ze.getMethod()); } } /** * Ensures that the close method of this zipfile is called when * there are no more references to it. * @see #close() */ @Override protected void finalize() throws Throwable { try { if (!closed) { System.err.println("Cleaning up unclosed ZipFile for archive " + archiveName); close(); } } finally { super.finalize(); } } /** * Length of a "central directory" entry structure without file * name, extra fields or comment. */ private static final int CFH_LEN = /* version made by */ SHORT /* version needed to extract */ + SHORT /* general purpose bit flag */ + SHORT /* compression method */ + SHORT /* last mod file time */ + SHORT /* last mod file date */ + SHORT /* crc-32 */ + WORD /* compressed size */ + WORD /* uncompressed size */ + WORD /* filename length */ + SHORT /* extra field length */ + SHORT /* file comment length */ + SHORT /* disk number start */ + SHORT /* internal file attributes */ + SHORT /* external file attributes */ + WORD /* relative offset of local header */ + WORD; private static final long CFH_SIG = ZipLong.getValue(ZipOutputStream.CFH_SIG); /** * Reads the central directory of the given archive and populates * the internal tables with ZipEntry instances. * *The ZipEntrys will know all data that can be obtained from * the central directory alone, but not the data that requires the * local file header or additional data to be read.
* * @return a map of zipentries that didn't have the language * encoding flag set when read. */ private MapEnsures the Zip64 extra either knows both compressed and * uncompressed size or neither of both as the internal logic in * ExtraFieldUtils forces the field to create local header data * even if they are never used - and here a field with only one * size would be invalid.
*/ private void setSizesAndOffsetFromZip64Extra(final ZipEntry ze, final OffsetEntry offset, final int diskStart) throws IOException { final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) ze.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); if (z64 != null) { final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC; final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC; final boolean hasRelativeHeaderOffset = offset.headerOffset == ZIP64_MAGIC; z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, diskStart == ZIP64_MAGIC_SHORT); if (hasUncompressedSize) { ze.setSize(z64.getSize().getLongValue()); } else if (hasCompressedSize) { z64.setSize(new ZipEightByteInteger(ze.getSize())); } if (hasCompressedSize) { ze.setCompressedSize(z64.getCompressedSize().getLongValue()); } else if (hasUncompressedSize) { z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize())); } if (hasRelativeHeaderOffset) { offset.headerOffset = z64.getRelativeHeaderOffset().getLongValue(); } } } /** * Length of the "End of central directory record" - which is * supposed to be the last structure of the archive - without file * comment. */ private static final int MIN_EOCD_SIZE = /* end of central dir signature */ WORD /* number of this disk */ + SHORT /* number of the disk with the */ /* start of the central directory */ + SHORT /* total number of entries in */ /* the central dir on this disk */ + SHORT /* total number of entries in */ /* the central dir */ + SHORT /* size of the central directory */ + WORD /* offset of start of central */ /* directory with respect to */ /* the starting disk number */ + WORD /* zipfile comment length */ + SHORT; /** * Maximum length of the "End of central directory record" with a * file comment. */ private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE /* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT; /** * Offset of the field that holds the location of the first * central directory entry inside the "End of central directory * record" relative to the start of the "End of central directory * record". */ private static final int CFD_LOCATOR_OFFSET = /* end of central dir signature */ WORD /* number of this disk */ + SHORT /* number of the disk with the */ /* start of the central directory */ + SHORT /* total number of entries in */ /* the central dir on this disk */ + SHORT /* total number of entries in */ /* the central dir */ + SHORT /* size of the central directory */ + WORD; /** * Length of the "Zip64 end of central directory locator" - which * should be right in front of the "end of central directory * record" if one is present at all. */ private static final int ZIP64_EOCDL_LENGTH = /* zip64 end of central dir locator sig */ WORD /* number of the disk with the start */ /* start of the zip64 end of */ /* central directory */ + WORD /* relative offset of the zip64 */ /* end of central directory record */ + DWORD /* total number of disks */ + WORD; /** * Offset of the field that holds the location of the "Zip64 end * of central directory record" inside the "Zip64 end of central * directory locator" relative to the start of the "Zip64 end of * central directory locator". */ private static final int ZIP64_EOCDL_LOCATOR_OFFSET = /* zip64 end of central dir locator sig */ WORD /* number of the disk with the start */ /* start of the zip64 end of */ /* central directory */ + WORD; /** * Offset of the field that holds the location of the first * central directory entry inside the "Zip64 end of central * directory record" relative to the start of the "Zip64 end of * central directory record". */ private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = /* zip64 end of central dir */ /* signature */ WORD /* size of zip64 end of central */ /* directory record */ + DWORD /* version made by */ + SHORT /* version needed to extract */ + SHORT /* number of this disk */ + WORD /* number of the disk with the */ /* start of the central directory */ + WORD /* total number of entries in the */ /* central directory on this disk */ + DWORD /* total number of entries in the */ /* central directory */ + DWORD /* size of the central directory */ + DWORD; /** * Searches for either the "Zip64 end of central directory * locator" or the "End of central dir record", parses * it and positions the stream at the first central directory * record. */ private void positionAtCentralDirectory() throws IOException { positionAtEndOfCentralDirectoryRecord(); boolean found = false; final boolean searchedForZip64EOCD = archive.getFilePointer() > ZIP64_EOCDL_LENGTH; if (searchedForZip64EOCD) { archive.seek(archive.getFilePointer() - ZIP64_EOCDL_LENGTH); archive.readFully(WORD_BUF); found = Arrays.equals(ZipOutputStream.ZIP64_EOCD_LOC_SIG, WORD_BUF); } if (!found) { // not a ZIP64 archive if (searchedForZip64EOCD) { skipBytes(ZIP64_EOCDL_LENGTH - WORD); } positionAtCentralDirectory32(); } else { positionAtCentralDirectory64(); } } /** * Parses the "Zip64 end of central directory locator", * finds the "Zip64 end of central directory record" using the * parsed information, parses that and positions the stream at the * first central directory record. */ private void positionAtCentralDirectory64() throws IOException { skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - WORD /* signature has already been read */); archive.readFully(DWORD_BUF); archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); archive.readFully(WORD_BUF); if (!Arrays.equals(WORD_BUF, ZipOutputStream.ZIP64_EOCD_SIG)) { throw new ZipException("archive's ZIP64 end of central " + "directory locator is corrupt."); } skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - WORD /* signature has already been read */); archive.readFully(DWORD_BUF); archive.seek(ZipEightByteInteger.getLongValue(DWORD_BUF)); } /** * Searches for the "End of central dir record", parses * it and positions the stream at the first central directory * record. */ private void positionAtCentralDirectory32() throws IOException { skipBytes(CFD_LOCATOR_OFFSET); archive.readFully(WORD_BUF); archive.seek(ZipLong.getValue(WORD_BUF)); } /** * Searches for the and positions the stream at the start of the * "End of central dir record". */ private void positionAtEndOfCentralDirectoryRecord() throws IOException { final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipOutputStream.EOCD_SIG); if (!found) { throw new ZipException("archive is not a ZIP archive"); } } /** * Searches the archive backwards from minDistance to maxDistance * for the given signature, positions the RandomaccessFile right * at the signature if it has been found. */ private boolean tryToLocateSignature(final long minDistanceFromEnd, final long maxDistanceFromEnd, final byte[] sig) throws IOException { boolean found = false; long off = archive.length() - minDistanceFromEnd; final long stopSearching = Math.max(0L, archive.length() - maxDistanceFromEnd); if (off >= 0) { for (; off >= stopSearching; off--) { archive.seek(off); int curr = archive.read(); if (curr == -1) { break; } if (curr == sig[POS_0]) { curr = archive.read(); if (curr == sig[POS_1]) { curr = archive.read(); if (curr == sig[POS_2]) { curr = archive.read(); if (curr == sig[POS_3]) { found = true; break; } } } } } } if (found) { archive.seek(off); } return found; } /** * Skips the given number of bytes or throws an EOFException if * skipping failed. */ private void skipBytes(final int count) throws IOException { int totalSkipped = 0; while (totalSkipped < count) { final int skippedNow = archive.skipBytes(count - totalSkipped); if (skippedNow <= 0) { throw new EOFException(); } totalSkipped += skippedNow; } } /** * Number of bytes in local file header up to the "length of * filename" entry. */ private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = /* local file header signature */ WORD /* version needed to extract */ + SHORT /* general purpose bit flag */ + SHORT /* compression method */ + SHORT /* last mod file time */ + SHORT /* last mod file date */ + SHORT /* crc-32 */ + WORD /* compressed size */ + WORD /* uncompressed size */ + WORD; /** * Walks through all recorded entries and adds the data available * from the local file header. * *Also records the offsets for the data to read from the * entries.
*/ private void resolveLocalFileHeaderData(final MapWon't return any meaningful results if one of the entries * isn't part of the archive at all.
* * @since Ant 1.9.0 */ private final Comparator