diff options
Diffstat (limited to 'framework/src/ant/apache-ant-1.9.6/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java')
-rw-r--r-- | framework/src/ant/apache-ant-1.9.6/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java | 1580 |
1 files changed, 1580 insertions, 0 deletions
diff --git a/framework/src/ant/apache-ant-1.9.6/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java b/framework/src/ant/apache-ant-1.9.6/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java new file mode 100644 index 00000000..01e23424 --- /dev/null +++ b/framework/src/ant/apache-ant-1.9.6/src/main/org/apache/tools/bzip2/CBZip2OutputStream.java @@ -0,0 +1,1580 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/* + * This package is based on the work done by Keiron Liddle, Aftex Software + * <keiron@aftexsw.com> to whom the Ant project is very grateful for his + * great code. + */ + +package org.apache.tools.bzip2; + +import java.io.IOException; +import java.io.OutputStream; + +/** + * An output stream that compresses into the BZip2 format (without the file + * header chars) into another stream. + * + * <p> + * The compression requires large amounts of memory. Thus you should call the + * {@link #close() close()} method as soon as possible, to force + * <tt>CBZip2OutputStream</tt> to release the allocated memory. + * </p> + * + * <p> You can shrink the amount of allocated memory and maybe raise + * the compression speed by choosing a lower blocksize, which in turn + * may cause a lower compression ratio. You can avoid unnecessary + * memory allocation by avoiding using a blocksize which is bigger + * than the size of the input. </p> + * + * <p> You can compute the memory usage for compressing by the + * following formula: </p> + * + * <pre> + * <code>400k + (9 * blocksize)</code>. + * </pre> + * + * <p> To get the memory required for decompression by {@link + * CBZip2InputStream CBZip2InputStream} use </p> + * + * <pre> + * <code>65k + (5 * blocksize)</code>. + * </pre> + * + * <table width="100%" border="1"> + * <colgroup> <col width="33%" /> <col width="33%" /> <col width="33%" /> + * </colgroup> + * <tr> + * <th colspan="3">Memory usage by blocksize</th> + * </tr> + * <tr> + * <th align="right">Blocksize</th> <th align="right">Compression<br> + * memory usage</th> <th align="right">Decompression<br> + * memory usage</th> + * </tr> + * <tr> + * <td align="right">100k</td> + * <td align="right">1300k</td> + * <td align="right">565k</td> + * </tr> + * <tr> + * <td align="right">200k</td> + * <td align="right">2200k</td> + * <td align="right">1065k</td> + * </tr> + * <tr> + * <td align="right">300k</td> + * <td align="right">3100k</td> + * <td align="right">1565k</td> + * </tr> + * <tr> + * <td align="right">400k</td> + * <td align="right">4000k</td> + * <td align="right">2065k</td> + * </tr> + * <tr> + * <td align="right">500k</td> + * <td align="right">4900k</td> + * <td align="right">2565k</td> + * </tr> + * <tr> + * <td align="right">600k</td> + * <td align="right">5800k</td> + * <td align="right">3065k</td> + * </tr> + * <tr> + * <td align="right">700k</td> + * <td align="right">6700k</td> + * <td align="right">3565k</td> + * </tr> + * <tr> + * <td align="right">800k</td> + * <td align="right">7600k</td> + * <td align="right">4065k</td> + * </tr> + * <tr> + * <td align="right">900k</td> + * <td align="right">8500k</td> + * <td align="right">4565k</td> + * </tr> + * </table> + * + * <p> + * For decompression <tt>CBZip2InputStream</tt> allocates less memory if the + * bzipped input is smaller than one block. + * </p> + * + * <p> + * Instances of this class are not threadsafe. + * </p> + * + * <p> + * TODO: Update to BZip2 1.0.1 + * </p> + * + */ +public class CBZip2OutputStream extends OutputStream + implements BZip2Constants { + + /** + * The minimum supported blocksize <tt> == 1</tt>. + */ + public static final int MIN_BLOCKSIZE = 1; + + /** + * The maximum supported blocksize <tt> == 9</tt>. + */ + public static final int MAX_BLOCKSIZE = 9; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ + protected static final int SETMASK = (1 << 21); + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ + protected static final int CLEARMASK = (~SETMASK); + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ + protected static final int GREATER_ICOST = 15; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ + protected static final int LESSER_ICOST = 0; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ + protected static final int SMALL_THRESH = 20; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ + protected static final int DEPTH_THRESH = 10; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + */ + protected static final int WORK_FACTOR = 30; + + /** + * This constant is accessible by subclasses for historical + * purposes. If you don't know what it means then you don't need + * it. + * <p> If you are ever unlucky/improbable enough to get a stack + * overflow whilst sorting, increase the following constant and + * try again. In practice I have never seen the stack go above 27 + * elems, so the following limit seems very generous. </p> + */ + protected static final int QSORT_STACK_SIZE = 1000; + + /** + * Knuth's increments seem to work better than Incerpi-Sedgewick here. + * Possibly because the number of elems to sort is usually small, typically + * <= 20. + */ + private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, 797161, + 2391484 }; + + /** + * This method is accessible by subclasses for historical + * purposes. If you don't know what it does then you don't need + * it. + */ + protected static void hbMakeCodeLengths(char[] len, int[] freq, + int alphaSize, int maxLen) { + /* + * Nodes and heap entries run from 1. Entry 0 for both the heap and + * nodes is a sentinel. + */ + final int[] heap = new int[MAX_ALPHA_SIZE * 2]; + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; + + for (int i = alphaSize; --i >= 0;) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } + + for (boolean tooLong = true; tooLong;) { + tooLong = false; + + int nNodes = alphaSize; + int nHeap = 0; + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (int i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + + int zz = nHeap; + int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + + // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap; + + while (nHeap > 1) { + int n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + int yy = 0; + int zz = 1; + int tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + + int n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + nNodes++; + parent[n1] = parent[n2] = nNodes; + + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = (((weight_n1 & 0xffffff00) + + (weight_n2 & 0xffffff00)) + | + (1 + (((weight_n1 & 0x000000ff) + > (weight_n2 & 0x000000ff)) + ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff)) + )); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + + } + + // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes; + + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; + j++; + } + + len[i - 1] = (char) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } + } + } + } + + private static void hbMakeCodeLengths(final byte[] len, final int[] freq, + final Data dat, final int alphaSize, + final int maxLen) { + /* + * Nodes and heap entries run from 1. Entry 0 for both the heap and + * nodes is a sentinel. + */ + final int[] heap = dat.heap; + final int[] weight = dat.weight; + final int[] parent = dat.parent; + + for (int i = alphaSize; --i >= 0;) { + weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + } + + for (boolean tooLong = true; tooLong;) { + tooLong = false; + + int nNodes = alphaSize; + int nHeap = 0; + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (int i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + + int zz = nHeap; + int tmp = heap[zz]; + while (weight[tmp] < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + } + + while (nHeap > 1) { + int n1 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + int yy = 0; + int zz = 1; + int tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + + int n2 = heap[1]; + heap[1] = heap[nHeap]; + nHeap--; + + yy = 0; + zz = 1; + tmp = heap[1]; + + while (true) { + yy = zz << 1; + + if (yy > nHeap) { + break; + } + + if ((yy < nHeap) + && (weight[heap[yy + 1]] < weight[heap[yy]])) { + yy++; + } + + if (weight[tmp] < weight[heap[yy]]) { + break; + } + + heap[zz] = heap[yy]; + zz = yy; + } + + heap[zz] = tmp; + nNodes++; + parent[n1] = parent[n2] = nNodes; + + final int weight_n1 = weight[n1]; + final int weight_n2 = weight[n2]; + weight[nNodes] = ((weight_n1 & 0xffffff00) + + (weight_n2 & 0xffffff00)) + | (1 + (((weight_n1 & 0x000000ff) + > (weight_n2 & 0x000000ff)) + ? (weight_n1 & 0x000000ff) + : (weight_n2 & 0x000000ff))); + + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + + tmp = 0; + zz = nHeap; + tmp = heap[zz]; + final int weight_tmp = weight[tmp]; + while (weight_tmp < weight[heap[zz >> 1]]) { + heap[zz] = heap[zz >> 1]; + zz >>= 1; + } + heap[zz] = tmp; + + } + + for (int i = 1; i <= alphaSize; i++) { + int j = 0; + int k = i; + + for (int parent_k; (parent_k = parent[k]) >= 0;) { + k = parent_k; + j++; + } + + len[i - 1] = (byte) j; + if (j > maxLen) { + tooLong = true; + } + } + + if (tooLong) { + for (int i = 1; i < alphaSize; i++) { + int j = weight[i] >> 8; + j = 1 + (j >> 1); + weight[i] = j << 8; + } + } + } + } + + /** + * Index of the last char in the block, so the block size == last + 1. + */ + private int last; + + /** + * Always: in the range 0 .. 9. The current block size is 100000 * this + * number. + */ + private final int blockSize100k; + + private int bsBuff; + private int bsLive; + private final CRC crc = new CRC(); + + private int nInUse; + + private int nMTF; + + private int currentChar = -1; + private int runLength = 0; + + private int blockCRC; + private int combinedCRC; + private final int allowableBlockSize; + + /** + * All memory intensive stuff. + */ + private Data data; + private BlockSort blockSorter; + + private OutputStream out; + + /** + * Chooses a blocksize based on the given length of the data to compress. + * + * @return The blocksize, between {@link #MIN_BLOCKSIZE} and + * {@link #MAX_BLOCKSIZE} both inclusive. For a negative + * <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt> + * always. + * + * @param inputLength + * The length of the data which will be compressed by + * <tt>CBZip2OutputStream</tt>. + */ + public static int chooseBlockSize(long inputLength) { + return (inputLength > 0) ? (int) Math + .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; + } + + /** + * Constructs a new <tt>CBZip2OutputStream</tt> with a blocksize of 900k. + * + * <p> + * <b>Attention: </b>The caller is responsible to write the two BZip2 magic + * bytes <tt>"BZ"</tt> to the specified stream prior to calling this + * constructor. + * </p> + * + * @param out * + * the destination stream. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws NullPointerException + * if <code>out == null</code>. + */ + public CBZip2OutputStream(final OutputStream out) throws IOException { + this(out, MAX_BLOCKSIZE); + } + + /** + * Constructs a new <tt>CBZip2OutputStream</tt> with specified blocksize. + * + * <p> + * <b>Attention: </b>The caller is responsible to write the two BZip2 magic + * bytes <tt>"BZ"</tt> to the specified stream prior to calling this + * constructor. + * </p> + * + * + * @param out + * the destination stream. + * @param blockSize + * the blockSize as 100k units. + * + * @throws IOException + * if an I/O error occurs in the specified stream. + * @throws IllegalArgumentException + * if <code>(blockSize < 1) || (blockSize > 9)</code>. + * @throws NullPointerException + * if <code>out == null</code>. + * + * @see #MIN_BLOCKSIZE + * @see #MAX_BLOCKSIZE + */ + public CBZip2OutputStream(final OutputStream out, final int blockSize) + throws IOException { + super(); + + if (blockSize < 1) { + throw new IllegalArgumentException("blockSize(" + blockSize + + ") < 1"); + } + if (blockSize > 9) { + throw new IllegalArgumentException("blockSize(" + blockSize + + ") > 9"); + } + + this.blockSize100k = blockSize; + this.out = out; + + /* 20 is just a paranoia constant */ + this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20; + init(); + } + + /** {@inheritDoc} */ + @Override + public void write(final int b) throws IOException { + if (this.out != null) { + write0(b); + } else { + throw new IOException("closed"); + } + } + + /** + * Writes the current byte to the buffer, run-length encoding it + * if it has been repeated at least four times (the first step + * RLEs sequences of four identical bytes). + * + * <p>Flushes the current block before writing data if it is + * full.</p> + * + * <p>"write to the buffer" means adding to data.buffer starting + * two steps "after" this.last - initially starting at index 1 + * (not 0) - and updating this.last to point to the last index + * written minus 1.</p> + */ + private void writeRun() throws IOException { + final int lastShadow = this.last; + + if (lastShadow < this.allowableBlockSize) { + final int currentCharShadow = this.currentChar; + final Data dataShadow = this.data; + dataShadow.inUse[currentCharShadow] = true; + final byte ch = (byte) currentCharShadow; + + int runLengthShadow = this.runLength; + this.crc.updateCRC(currentCharShadow, runLengthShadow); + + switch (runLengthShadow) { + case 1: + dataShadow.block[lastShadow + 2] = ch; + this.last = lastShadow + 1; + break; + + case 2: + dataShadow.block[lastShadow + 2] = ch; + dataShadow.block[lastShadow + 3] = ch; + this.last = lastShadow + 2; + break; + + case 3: { + final byte[] block = dataShadow.block; + block[lastShadow + 2] = ch; + block[lastShadow + 3] = ch; + block[lastShadow + 4] = ch; + this.last = lastShadow + 3; + } + break; + + default: { + runLengthShadow -= 4; + dataShadow.inUse[runLengthShadow] = true; + final byte[] block = dataShadow.block; + block[lastShadow + 2] = ch; + block[lastShadow + 3] = ch; + block[lastShadow + 4] = ch; + block[lastShadow + 5] = ch; + block[lastShadow + 6] = (byte) runLengthShadow; + this.last = lastShadow + 5; + } + break; + + } + } else { + endBlock(); + initBlock(); + writeRun(); + } + } + + /** + * Overridden to close the stream. + */ + @Override + protected void finalize() throws Throwable { + finish(); + super.finalize(); + } + + + public void finish() throws IOException { + if (out != null) { + try { + if (this.runLength > 0) { + writeRun(); + } + this.currentChar = -1; + endBlock(); + endCompression(); + } finally { + this.out = null; + this.data = null; + this.blockSorter = null; + } + } + } + + @Override + public void close() throws IOException { + if (out != null) { + OutputStream outShadow = this.out; + finish(); + outShadow.close(); + } + } + + @Override + public void flush() throws IOException { + OutputStream outShadow = this.out; + if (outShadow != null) { + outShadow.flush(); + } + } + + private void init() throws IOException { + // write magic: done by caller who created this stream + // this.out.write('B'); + // this.out.write('Z'); + + this.data = new Data(this.blockSize100k); + this.blockSorter = new BlockSort(this.data); + + /* + * Write `magic' bytes h indicating file-format == huffmanised, followed + * by a digit indicating blockSize100k. + */ + bsPutUByte('h'); + bsPutUByte('0' + this.blockSize100k); + + this.combinedCRC = 0; + initBlock(); + } + + private void initBlock() { + // blockNo++; + this.crc.initialiseCRC(); + this.last = -1; + // ch = 0; + + boolean[] inUse = this.data.inUse; + for (int i = 256; --i >= 0;) { + inUse[i] = false; + } + } + + private void endBlock() throws IOException { + this.blockCRC = this.crc.getFinalCRC(); + this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); + this.combinedCRC ^= this.blockCRC; + + // empty block at end of file + if (this.last == -1) { + return; + } + + /* sort the block and establish posn of original string */ + blockSort(); + + /* + * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 + * :-). A 32 bit value does not really give a strong enough guarantee + * that the value will not appear by chance in the compressed + * datastream. Worst-case probability of this event, for a 900k block, + * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 + * bits. For a compressed file of size 100Gb -- about 100000 blocks -- + * only a 48-bit marker will do. NB: normal compression/ decompression + * donot rely on these statistical properties. They are only important + * when trying to recover blocks from damaged files. + */ + bsPutUByte(0x31); + bsPutUByte(0x41); + bsPutUByte(0x59); + bsPutUByte(0x26); + bsPutUByte(0x53); + bsPutUByte(0x59); + + /* Now the block's CRC, so it is in a known place. */ + bsPutInt(this.blockCRC); + + /* Now a single bit indicating no randomisation. */ + bsW(1, 0); + + /* Finally, block's contents proper. */ + moveToFrontCodeAndSend(); + } + + private void endCompression() throws IOException { + /* + * Now another magic 48-bit number, 0x177245385090, to indicate the end + * of the last block. (sqrt(pi), if you want to know. I did want to use + * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me + * to feel statistically comfortable. Call me paranoid.) + */ + bsPutUByte(0x17); + bsPutUByte(0x72); + bsPutUByte(0x45); + bsPutUByte(0x38); + bsPutUByte(0x50); + bsPutUByte(0x90); + + bsPutInt(this.combinedCRC); + bsFinishedWithStream(); + } + + /** + * Returns the blocksize parameter specified at construction time. + */ + public final int getBlockSize() { + return this.blockSize100k; + } + + @Override + public void write(final byte[] buf, int offs, final int len) + throws IOException { + if (offs < 0) { + throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); + } + if (len < 0) { + throw new IndexOutOfBoundsException("len(" + len + ") < 0."); + } + if (offs + len > buf.length) { + throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + + len + ") > buf.length(" + + buf.length + ")."); + } + if (this.out == null) { + throw new IOException("stream closed"); + } + + for (int hi = offs + len; offs < hi;) { + write0(buf[offs++]); + } + } + + /** + * Keeps track of the last bytes written and implicitly performs + * run-length encoding as the first step of the bzip2 algorithm. + */ + private void write0(int b) throws IOException { + if (this.currentChar != -1) { + b &= 0xff; + if (this.currentChar == b) { + if (++this.runLength > 254) { + writeRun(); + this.currentChar = -1; + this.runLength = 0; + } + // else nothing to do + } else { + writeRun(); + this.runLength = 1; + this.currentChar = b; + } + } else { + this.currentChar = b & 0xff; + this.runLength++; + } + } + + private static void hbAssignCodes(final int[] code, final byte[] length, + final int minLen, final int maxLen, + final int alphaSize) { + int vec = 0; + for (int n = minLen; n <= maxLen; n++) { + for (int i = 0; i < alphaSize; i++) { + if ((length[i] & 0xff) == n) { + code[i] = vec; + vec++; + } + } + vec <<= 1; + } + } + + private void bsFinishedWithStream() throws IOException { + while (this.bsLive > 0) { + int ch = this.bsBuff >> 24; + this.out.write(ch); // write 8-bit + this.bsBuff <<= 8; + this.bsLive -= 8; + } + } + + private void bsW(final int n, final int v) throws IOException { + final OutputStream outShadow = this.out; + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + + this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); + this.bsLive = bsLiveShadow + n; + } + + private void bsPutUByte(final int c) throws IOException { + bsW(8, c); + } + + private void bsPutInt(final int u) throws IOException { + bsW(8, (u >> 24) & 0xff); + bsW(8, (u >> 16) & 0xff); + bsW(8, (u >> 8) & 0xff); + bsW(8, u & 0xff); + } + + private void sendMTFValues() throws IOException { + final byte[][] len = this.data.sendMTFValues_len; + final int alphaSize = this.nInUse + 2; + + for (int t = N_GROUPS; --t >= 0;) { + byte[] len_t = len[t]; + for (int v = alphaSize; --v >= 0;) { + len_t[v] = GREATER_ICOST; + } + } + + /* Decide how many coding tables to use */ + // assert (this.nMTF > 0) : this.nMTF; + final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 + : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; + + /* Generate an initial set of coding tables */ + sendMTFValues0(nGroups, alphaSize); + + /* + * Iterate up to N_ITERS times to improve the tables. + */ + final int nSelectors = sendMTFValues1(nGroups, alphaSize); + + /* Compute MTF values for the selectors. */ + sendMTFValues2(nGroups, nSelectors); + + /* Assign actual codes for the tables. */ + sendMTFValues3(nGroups, alphaSize); + + /* Transmit the mapping table. */ + sendMTFValues4(); + + /* Now the selectors. */ + sendMTFValues5(nGroups, nSelectors); + + /* Now the coding tables. */ + sendMTFValues6(nGroups, alphaSize); + + /* And finally, the block data proper */ + sendMTFValues7(); + } + + private void sendMTFValues0(final int nGroups, final int alphaSize) { + final byte[][] len = this.data.sendMTFValues_len; + final int[] mtfFreq = this.data.mtfFreq; + + int remF = this.nMTF; + int gs = 0; + + for (int nPart = nGroups; nPart > 0; nPart--) { + final int tFreq = remF / nPart; + int ge = gs - 1; + int aFreq = 0; + + for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { + aFreq += mtfFreq[++ge]; + } + + if ((ge > gs) && (nPart != nGroups) && (nPart != 1) + && (((nGroups - nPart) & 1) != 0)) { + aFreq -= mtfFreq[ge--]; + } + + final byte[] len_np = len[nPart - 1]; + for (int v = alphaSize; --v >= 0;) { + if ((v >= gs) && (v <= ge)) { + len_np[v] = LESSER_ICOST; + } else { + len_np[v] = GREATER_ICOST; + } + } + + gs = ge + 1; + remF -= aFreq; + } + } + + private int sendMTFValues1(final int nGroups, final int alphaSize) { + final Data dataShadow = this.data; + final int[][] rfreq = dataShadow.sendMTFValues_rfreq; + final int[] fave = dataShadow.sendMTFValues_fave; + final short[] cost = dataShadow.sendMTFValues_cost; + final char[] sfmap = dataShadow.sfmap; + final byte[] selector = dataShadow.selector; + final byte[][] len = dataShadow.sendMTFValues_len; + final byte[] len_0 = len[0]; + final byte[] len_1 = len[1]; + final byte[] len_2 = len[2]; + final byte[] len_3 = len[3]; + final byte[] len_4 = len[4]; + final byte[] len_5 = len[5]; + final int nMTFShadow = this.nMTF; + + int nSelectors = 0; + + for (int iter = 0; iter < N_ITERS; iter++) { + for (int t = nGroups; --t >= 0;) { + fave[t] = 0; + int[] rfreqt = rfreq[t]; + for (int i = alphaSize; --i >= 0;) { + rfreqt[i] = 0; + } + } + + nSelectors = 0; + + for (int gs = 0; gs < this.nMTF;) { + /* Set group start & end marks. */ + + /* + * Calculate the cost of this group as coded by each of the + * coding tables. + */ + + final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); + + if (nGroups == N_GROUPS) { + // unrolled version of the else-block + + short cost0 = 0; + short cost1 = 0; + short cost2 = 0; + short cost3 = 0; + short cost4 = 0; + short cost5 = 0; + + for (int i = gs; i <= ge; i++) { + final int icv = sfmap[i]; + cost0 += len_0[icv] & 0xff; + cost1 += len_1[icv] & 0xff; + cost2 += len_2[icv] & 0xff; + cost3 += len_3[icv] & 0xff; + cost4 += len_4[icv] & 0xff; + cost5 += len_5[icv] & 0xff; + } + + cost[0] = cost0; + cost[1] = cost1; + cost[2] = cost2; + cost[3] = cost3; + cost[4] = cost4; + cost[5] = cost5; + + } else { + for (int t = nGroups; --t >= 0;) { + cost[t] = 0; + } + + for (int i = gs; i <= ge; i++) { + final int icv = sfmap[i]; + for (int t = nGroups; --t >= 0;) { + cost[t] += len[t][icv] & 0xff; + } + } + } + + /* + * Find the coding table which is best for this group, and + * record its identity in the selector table. + */ + int bt = -1; + for (int t = nGroups, bc = 999999999; --t >= 0;) { + final int cost_t = cost[t]; + if (cost_t < bc) { + bc = cost_t; + bt = t; + } + } + + fave[bt]++; + selector[nSelectors] = (byte) bt; + nSelectors++; + + /* + * Increment the symbol frequencies for the selected table. + */ + final int[] rfreq_bt = rfreq[bt]; + for (int i = gs; i <= ge; i++) { + rfreq_bt[sfmap[i]]++; + } + + gs = ge + 1; + } + + /* + * Recompute the tables based on the accumulated frequencies. + */ + for (int t = 0; t < nGroups; t++) { + hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); + } + } + + return nSelectors; + } + + private void sendMTFValues2(final int nGroups, final int nSelectors) { + // assert (nGroups < 8) : nGroups; + + final Data dataShadow = this.data; + byte[] pos = dataShadow.sendMTFValues2_pos; + + for (int i = nGroups; --i >= 0;) { + pos[i] = (byte) i; + } + + for (int i = 0; i < nSelectors; i++) { + final byte ll_i = dataShadow.selector[i]; + byte tmp = pos[0]; + int j = 0; + + while (ll_i != tmp) { + j++; + byte tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + } + + pos[0] = tmp; + dataShadow.selectorMtf[i] = (byte) j; + } + } + + private void sendMTFValues3(final int nGroups, final int alphaSize) { + int[][] code = this.data.sendMTFValues_code; + byte[][] len = this.data.sendMTFValues_len; + + for (int t = 0; t < nGroups; t++) { + int minLen = 32; + int maxLen = 0; + final byte[] len_t = len[t]; + for (int i = alphaSize; --i >= 0;) { + final int l = len_t[i] & 0xff; + if (l > maxLen) { + maxLen = l; + } + if (l < minLen) { + minLen = l; + } + } + + // assert (maxLen <= 20) : maxLen; + // assert (minLen >= 1) : minLen; + + hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); + } + } + + private void sendMTFValues4() throws IOException { + final boolean[] inUse = this.data.inUse; + final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; + + for (int i = 16; --i >= 0;) { + inUse16[i] = false; + final int i16 = i * 16; + for (int j = 16; --j >= 0;) { + if (inUse[i16 + j]) { + inUse16[i] = true; + } + } + } + + for (int i = 0; i < 16; i++) { + bsW(1, inUse16[i] ? 1 : 0); + } + + final OutputStream outShadow = this.out; + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int i = 0; i < 16; i++) { + if (inUse16[i]) { + final int i16 = i * 16; + for (int j = 0; j < 16; j++) { + // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + if (inUse[i16 + j]) { + bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); + } + bsLiveShadow++; + } + } + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues5(final int nGroups, final int nSelectors) + throws IOException { + bsW(3, nGroups); + bsW(15, nSelectors); + + final OutputStream outShadow = this.out; + final byte[] selectorMtf = this.data.selectorMtf; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int i = 0; i < nSelectors; i++) { + for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { + // inlined: bsW(1, 1); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + + // inlined: bsW(1, 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues6(final int nGroups, final int alphaSize) + throws IOException { + final byte[][] len = this.data.sendMTFValues_len; + final OutputStream outShadow = this.out; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int t = 0; t < nGroups; t++) { + byte[] len_t = len[t]; + int curr = len_t[0] & 0xff; + + // inlined: bsW(5, curr); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= curr << (32 - bsLiveShadow - 5); + bsLiveShadow += 5; + + for (int i = 0; i < alphaSize; i++) { + int lti = len_t[i] & 0xff; + while (curr < lti) { + // inlined: bsW(2, 2); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); + bsLiveShadow += 2; + + curr++; /* 10 */ + } + + while (curr > lti) { + // inlined: bsW(2, 3); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); + bsLiveShadow += 2; + + curr--; /* 11 */ + } + + // inlined: bsW(1, 0); + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); // write 8-bit + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); + bsLiveShadow++; + } + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void sendMTFValues7() throws IOException { + final Data dataShadow = this.data; + final byte[][] len = dataShadow.sendMTFValues_len; + final int[][] code = dataShadow.sendMTFValues_code; + final OutputStream outShadow = this.out; + final byte[] selector = dataShadow.selector; + final char[] sfmap = dataShadow.sfmap; + final int nMTFShadow = this.nMTF; + + int selCtr = 0; + + int bsLiveShadow = this.bsLive; + int bsBuffShadow = this.bsBuff; + + for (int gs = 0; gs < nMTFShadow;) { + final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); + final int selector_selCtr = selector[selCtr] & 0xff; + final int[] code_selCtr = code[selector_selCtr]; + final byte[] len_selCtr = len[selector_selCtr]; + + while (gs <= ge) { + final int sfmap_i = sfmap[gs]; + + // + // inlined: bsW(len_selCtr[sfmap_i] & 0xff, + // code_selCtr[sfmap_i]); + // + while (bsLiveShadow >= 8) { + outShadow.write(bsBuffShadow >> 24); + bsBuffShadow <<= 8; + bsLiveShadow -= 8; + } + final int n = len_selCtr[sfmap_i] & 0xFF; + bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); + bsLiveShadow += n; + + gs++; + } + + gs = ge + 1; + selCtr++; + } + + this.bsBuff = bsBuffShadow; + this.bsLive = bsLiveShadow; + } + + private void moveToFrontCodeAndSend() throws IOException { + bsW(24, this.data.origPtr); + generateMTFValues(); + sendMTFValues(); + } + + private void blockSort() { + blockSorter.blockSort(data, last); + } + + /* + * Performs Move-To-Front on the Burrows-Wheeler transformed + * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB + * run-length-encoded form. + * + * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> + */ + private void generateMTFValues() { + final int lastShadow = this.last; + final Data dataShadow = this.data; + final boolean[] inUse = dataShadow.inUse; + final byte[] block = dataShadow.block; + final int[] fmap = dataShadow.fmap; + final char[] sfmap = dataShadow.sfmap; + final int[] mtfFreq = dataShadow.mtfFreq; + final byte[] unseqToSeq = dataShadow.unseqToSeq; + final byte[] yy = dataShadow.generateMTFValues_yy; + + // make maps + int nInUseShadow = 0; + for (int i = 0; i < 256; i++) { + if (inUse[i]) { + unseqToSeq[i] = (byte) nInUseShadow; + nInUseShadow++; + } + } + this.nInUse = nInUseShadow; + + final int eob = nInUseShadow + 1; + + for (int i = eob; i >= 0; i--) { + mtfFreq[i] = 0; + } + + for (int i = nInUseShadow; --i >= 0;) { + yy[i] = (byte) i; + } + + int wr = 0; + int zPend = 0; + + for (int i = 0; i <= lastShadow; i++) { + final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; + byte tmp = yy[0]; + int j = 0; + + while (ll_i != tmp) { + j++; + byte tmp2 = tmp; + tmp = yy[j]; + yy[j] = tmp2; + } + yy[0] = tmp; + + if (j == 0) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (true) { + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; + wr++; + mtfFreq[RUNA]++; + } else { + sfmap[wr] = RUNB; + wr++; + mtfFreq[RUNB]++; + } + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { + break; + } + } + zPend = 0; + } + sfmap[wr] = (char) (j + 1); + wr++; + mtfFreq[j + 1]++; + } + } + + if (zPend > 0) { + zPend--; + while (true) { + if ((zPend & 1) == 0) { + sfmap[wr] = RUNA; + wr++; + mtfFreq[RUNA]++; + } else { + sfmap[wr] = RUNB; + wr++; + mtfFreq[RUNB]++; + } + + if (zPend >= 2) { + zPend = (zPend - 2) >> 1; + } else { + break; + } + } + } + + sfmap[wr] = (char) eob; + mtfFreq[eob]++; + this.nMTF = wr + 1; + } + + static final class Data extends Object { + + // with blockSize 900k + /* maps unsigned byte => "does it occur in block" */ + final boolean[] inUse = new boolean[256]; // 256 byte + final byte[] unseqToSeq = new byte[256]; // 256 byte + final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte + final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte + final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte + + final byte[] generateMTFValues_yy = new byte[256]; // 256 byte + final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 + // byte + final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 + // byte + final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte + final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte + final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 + // byte + final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte + final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte + + final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte + final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte + + // ------------ + // 333408 byte + + /* holds the RLEd block of original data starting at index 1. + * After sorting the last byte added to the buffer is at index + * 0. */ + final byte[] block; // 900021 byte + /* maps index in Burrows-Wheeler transformed block => index of + * byte in original block */ + final int[] fmap; // 3600000 byte + final char[] sfmap; // 3600000 byte + // ------------ + // 8433529 byte + // ============ + + /** + * Index of original line in Burrows-Wheeler table. + * + * <p>This is the index in fmap that points to the last byte + * of the original data.</p> + */ + int origPtr; + + Data(int blockSize100k) { + super(); + + final int n = blockSize100k * BZip2Constants.baseBlockSize; + this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; + this.fmap = new int[n]; + this.sfmap = new char[2 * n]; + } + + } + +} |