From 9f858acdaca14174201766aab87c48fa09ce76f9 Mon Sep 17 00:00:00 2001 From: Not Zed Date: Fri, 26 Apr 2019 10:52:13 +0930 Subject: [PATCH] Made the copy/add split point configurable. Adjusted format, and froze it. Added some test casts. --- README | 27 ++--- .../au/notzed/dez/demo/DEZPrinter.java | 29 ++--- .../au/notzed/dez/ByteMatcherHash.java | 3 +- .../classes/au/notzed/dez/DEZDecoder.java | 23 ++-- .../classes/au/notzed/dez/DEZEncoder.java | 42 +++++--- .../classes/au/notzed/dez/DEZFormat.java | 46 ++++++-- .../au/notzed/dez/ByteMatcherHashTest.java | 1 - .../tests/au/notzed/dez/DEZEncoderTest.java | 56 +++++++--- src/notzed.dez/tests/au/notzed/dez/DEZIT.java | 100 +++++++++++++++--- 9 files changed, 234 insertions(+), 93 deletions(-) diff --git a/README b/README index 51c3ae3..d9aa51a 100644 --- a/README +++ b/README @@ -9,9 +9,14 @@ paper ``Data Compression Using Long Common Strings''. Here however the hashes may be performed on overlapping blocks, possibly at every location. -It includes an encoder and decoder for a proprietary (and still not -quite fixed) delta format 'DEZ1' but standardised formats such as -VCDIFF are possible by implementing a simple interface. +It compares very favourably to badiff (1.2) and jvcdiff (2.0) in most +metrics apart from encoding time (depending on the tunable +parameters). In particular memory use and patch size is considerably +better than badiff. + +It includes an encoder and decoder for a proprietary patch format +'DEZ1' but standardised formats such as VCDIFF are possible by +implementing a simple interface. The 'matcher' is also replaceable. @@ -65,18 +70,14 @@ $ java --module-path bin/modules -m notzed.dez.demo/au.notzed.dez.demo.dez \ STATUS ------ -As of 1.3 this could be considered pre-beta. The dez file format is -now likely fixed but still may change. At the very least it requires -a checksum for production use. - -There are now performance tunables to handle some nasty worst-case -behaviour due to implementing an exhaustive search. These trade off -delta size against cpu and memory use in the encoder. The decoder is -unchanged. +As of 1.3 this is solidly in beta. It should be production ready but +hasn't been heavily tested. The patch format is now fixed, it includes +a checksum. -It needs more testing. +It could do with some more tests, for example of the ByteMatcherHash +tunables. -The makefile is still work in progress. +The patch format and algorithm is described in DEZFormat.java. LINKS ----- diff --git a/src/notzed.dez.demo/classes/au/notzed/dez/demo/DEZPrinter.java b/src/notzed.dez.demo/classes/au/notzed/dez/demo/DEZPrinter.java index 4f3d61e..09d0cd6 100644 --- a/src/notzed.dez.demo/classes/au/notzed/dez/demo/DEZPrinter.java +++ b/src/notzed.dez.demo/classes/au/notzed/dez/demo/DEZPrinter.java @@ -112,16 +112,17 @@ public class DEZPrinter implements DEZFormat { int ti = 0; int flags; int smallest; + int op_single_split; pi = 0; // Read header out.printf("magic: %c%c%c%c\n", (char) patch[0], (char) patch[1], (char) patch[2], (char) patch[3]); pi += 4; - flags = decodeInt(); - out.printf("flags: %08x\n", flags); smallest = decodeInt(); + op_single_split = decodeInt(); out.printf("smallest: %d\n", smallest); + out.printf("split: %d\n", op_single_split); int sourceSize = decodeInt(); int targetSize = decodeInt(); @@ -144,7 +145,7 @@ public class DEZPrinter implements DEZFormat { pi += add; addr = decodeAddr(); - out.printf("%08x: ADD %d COPY %d @ $%08x\n", + out.printf("%08x: ADD #%d COPY #%d @ $%08x\n", ti, add, copy, addr); ti += add + copy; @@ -157,7 +158,7 @@ public class DEZPrinter implements DEZFormat { int addr0 = decodeAddr(); int addr1 = decodeAddr(); - out.printf("%08x: COPY %d @ $%08x COPY %d @ $%08x\n", + out.printf("%08x: COPY #%d @ $%08x COPY #%d @ $%08x\n", ti, copy0, addr0, copy1, addr1); ti += copy0 + copy1; @@ -174,43 +175,43 @@ public class DEZPrinter implements DEZFormat { // 127 ext command int n = op & 0x7f; - if (n < OP_SINGLE_SPLIT) { + if (n < op_single_split) { // COPY + addr int copy = n + smallest; int addr = decodeAddr(); - out.printf("%08x: COPY %d @ $%08x\n", ti, copy, addr); + out.printf("%08x: COPY #%d @ $%08x\n", ti, copy, addr); ti += copy; this.copy.add(copy); - } else if (n < 124) { + } else if (n < OP_SINGLE_LIMIT) { // ADD + data - int add = n - OP_SINGLE_SPLIT + 1; + int add = n - op_single_split + 1; - out.printf("%08x: ADD %d\n", ti, add); + out.printf("%08x: ADD #%d\n", ti, add); ti += add; pi += add; this.add.add(add); - } else if (n == 124) { + } else if (n == OP_COPY) { // COPY + length + addr - int copy = decodeInt() + smallest + OP_SINGLE_SPLIT; + int copy = decodeInt() + smallest + op_single_split; int addr = decodeAddr(); out.printf("%08x: COPY %d @ $%08x\n", ti, copy, addr); ti += copy; this.copy.add(copy); - } else if (n == 125) { + } else if (n == OP_ADD) { // ADD + length + data - int add = decodeInt() + (124 - OP_SINGLE_SPLIT) + 1; + int add = decodeInt() + (OP_SINGLE_LIMIT - op_single_split) + 1; out.printf("%08x: ADD %d\n", ti, add); ti += add; pi += add; this.add.add(add); - } else if (n == 126) { + } else if (n == OP_RUN) { // RUN + length + byte int run = decodeInt() + 3; byte r = patch[pi++]; diff --git a/src/notzed.dez/classes/au/notzed/dez/ByteMatcherHash.java b/src/notzed.dez/classes/au/notzed/dez/ByteMatcherHash.java index 1cfd2fd..402e8d7 100644 --- a/src/notzed.dez/classes/au/notzed/dez/ByteMatcherHash.java +++ b/src/notzed.dez/classes/au/notzed/dez/ByteMatcherHash.java @@ -84,6 +84,7 @@ public class ByteMatcherHash implements ByteMatcher { int size; b = Math.max(b, 3); + sstep = Math.max(sstep, 1); // This may need tuning. int logN = 31 - Integer.numberOfLeadingZeros((source.length + target.length) / sstep); @@ -184,7 +185,7 @@ public class ByteMatcherHash implements ByteMatcher { if (ri == 0) ri = 1; vs[ri] = value; - + //vs[roll++] = value; //if (roll == vs.length) // roll = 1; diff --git a/src/notzed.dez/classes/au/notzed/dez/DEZDecoder.java b/src/notzed.dez/classes/au/notzed/dez/DEZDecoder.java index 0af563a..3cbcd9e 100644 --- a/src/notzed.dez/classes/au/notzed/dez/DEZDecoder.java +++ b/src/notzed.dez/classes/au/notzed/dez/DEZDecoder.java @@ -162,8 +162,8 @@ public class DEZDecoder implements DEZFormat { pi += 4; - int flags = decodeInt(); int smallest = decodeInt(); + int op_single_split = decodeInt(); int sourceSize = decodeInt(); int targetSize = decodeInt(); byte[] target = new byte[targetSize]; @@ -187,7 +187,7 @@ public class DEZDecoder implements DEZFormat { addr = decodeAddr(); if (dump) - out.printf("%08x: ADD %d COPY %d @ $%08x\n", ti, add, copy, addr); + out.printf("%08x: ADD #%d COPY #%d @ $%08x\n", ti, add, copy, addr); ti = copy(addr, target, ti, copy); } else { // 01cccCCC @@ -200,10 +200,10 @@ public class DEZDecoder implements DEZFormat { ti = copy(addr1, target, ti, copy1); if (dump) - out.printf("%08x: COPY %d @ $%08x COPY %d @ $%08x\n", ti, copy0, addr0, copy1, addr1); + out.printf("%08x: COPY #%d @ $%08x COPY #%d @ $%08x\n", ti, copy0, addr0, copy1, addr1); } } else { - // 1NNNNNNN + // 1NNNNNNN with op_single_split=100 // 0 ... 99 copy smallest ... 99+smallest // 100 ... 123 add 1 ... 24 // 124 copy + length @@ -212,26 +212,26 @@ public class DEZDecoder implements DEZFormat { // 127 ext command int n = op & 0x7f; - if (n < OP_SINGLE_SPLIT) { + if (n < op_single_split) { // COPY + addr int copy = n + smallest; int addr = decodeAddr(); if (dump) - out.printf("%08x: COPY %d @ $%08x\n", ti, copy, addr); + out.printf("%08x: COPY #%d @ $%08x\n", ti, copy, addr); ti = copy(addr, target, ti, copy); } else if (n < OP_SINGLE_LIMIT) { // ADD + data - int add = n - OP_SINGLE_SPLIT + 1; + int add = n - op_single_split + 1; if (dump) - out.printf("%08x: ADD %d\n", ti, add); + out.printf("%08x: ADD #%d\n", ti, add); System.arraycopy(patch, pi, target, ti, add); ti += add; pi += add; } else if (n == OP_COPY) { // COPY + length + addr - int copy = decodeInt() + smallest + OP_SINGLE_SPLIT; + int copy = decodeInt() + smallest + op_single_split; int addr = decodeAddr(); if (dump) @@ -239,7 +239,7 @@ public class DEZDecoder implements DEZFormat { ti = copy(addr, target, ti, copy); } else if (n == OP_ADD) { // ADD + length + data - int add = decodeInt() + (124 - OP_SINGLE_SPLIT) + 1; + int add = decodeInt() + (OP_SINGLE_LIMIT - op_single_split) + 1; if (dump) out.printf("%08x: OP_ADD %d\n", ti, add); @@ -255,6 +255,9 @@ public class DEZDecoder implements DEZFormat { out.printf("%08x: OP_RUN $%02x %d\n", ti, r & 0xff, run); Arrays.fill(target, ti, ti + run, r); ti += run; + } else { + // No extension opcodes yet implemented. + throw new DEZFormatException("Unknown opcode"); } } } diff --git a/src/notzed.dez/classes/au/notzed/dez/DEZEncoder.java b/src/notzed.dez/classes/au/notzed/dez/DEZEncoder.java index d65777b..58cc8da 100644 --- a/src/notzed.dez/classes/au/notzed/dez/DEZEncoder.java +++ b/src/notzed.dez/classes/au/notzed/dez/DEZEncoder.java @@ -39,6 +39,7 @@ public class DEZEncoder implements ByteDeltaEncoder, DEZFormat { // Configurable parameters. final int smallest; + final int op_single_split; // Last operation information private int last = -1; private byte[] lastData; @@ -52,21 +53,34 @@ public class DEZEncoder implements ByteDeltaEncoder, DEZFormat { private int recentNext = 0; /** - * Creates a new encoder. + * Creates a new encoder with default parameters. *

- * The smallest copy size allowed is 4. + * smallest is 4 and split is @{value #OP_SINGLE_SPLIT}. */ public DEZEncoder() { this.smallest = 4; + this.op_single_split = OP_SINGLE_SPLIT; } /** * Creates a new encoder with a smallest copy size. * + * Out of range values are silently clamped. + * * @param smallest Sets the smallest copy allowed. It must be ≥ 4. + * @param split Sets the split for a single opcode. 0 ≤ split ≤ 124; */ - public DEZEncoder(int smallest) { - this.smallest = smallest; + public DEZEncoder(int smallest, int split) { + this.smallest = Math.max(4, smallest); + this.op_single_split = Math.min(OP_SINGLE_LIMIT, Math.max(0, split)); + } + + public int getSmallest() { + return smallest; + } + + public int getSplit() { + return op_single_split; } public void init(int sourceSize, int targetSize) { @@ -76,8 +90,8 @@ public class DEZEncoder implements ByteDeltaEncoder, DEZFormat { patch.reset(); patch.write(MAGIC); - encodeInt(flags); encodeInt(smallest); + encodeInt(op_single_split); encodeInt(sourceSize); encodeInt(targetSize); header = patch.size(); @@ -204,26 +218,26 @@ public class DEZEncoder implements ByteDeltaEncoder, DEZFormat { // 126 run + length (+3) + byte // 127 ext command if (last == OP_COPY) { - if (lastLen < OP_SINGLE_SPLIT + smallest) { + if (lastLen < op_single_split + smallest) { patch.write(0x80 | (lastLen - smallest)); if (dump) - System.out.printf("%02x %08x: COPY %d @ $%08x\n", 0x80 | (lastLen - smallest), here, lastLen, lastAddr); + System.out.printf("%02x %08x: COPY #%d @ $%08x\n", 0x80 | (lastLen - smallest), here, lastLen, lastAddr); } else { patch.write(0x80 | OP_COPY); - encodeInt(lastLen - smallest - OP_SINGLE_SPLIT); + encodeInt(lastLen - smallest - op_single_split); if (dump) System.out.printf("%02x %08x: COPY %d @ $%08x\n", 0x80 | OP_COPY, here, lastLen, lastAddr); } encodeAddr(lastAddr); here += lastLen; } else if (last == OP_ADD) { - if ((lastLen + OP_SINGLE_SPLIT - 1) < OP_SINGLE_LIMIT) { - patch.write(0x80 | (lastLen + OP_SINGLE_SPLIT - 1)); + if ((lastLen + op_single_split - 1) < OP_SINGLE_LIMIT) { + patch.write(0x80 | (lastLen + op_single_split - 1)); if (dump) - System.out.printf("%02x %08x: ADD %d\n", 0x80 | (lastLen + OP_SINGLE_SPLIT - 1), here, lastLen); + System.out.printf("%02x %08x: ADD #%d\n", 0x80 | (lastLen + op_single_split - 1), here, lastLen); } else { patch.write(0x80 | OP_ADD); - encodeInt(lastLen - (OP_SINGLE_LIMIT - OP_SINGLE_SPLIT) - 1); + encodeInt(lastLen - (OP_SINGLE_LIMIT - op_single_split) - 1); if (dump) System.out.printf("%02x %08x: ADD %d\n", 0x80 | OP_ADD, here, lastLen); } @@ -240,7 +254,7 @@ public class DEZEncoder implements ByteDeltaEncoder, DEZFormat { if (last == OP_ADD && lastLen < 1 + 8 && len < smallest + 8) { // 00AAACCC if (dump) - System.out.printf("%02x %08x: ADD %d COPY %d @ $%08x\n", + System.out.printf("%02x %08x: ADD #%d COPY #%d @ $%08x\n", ((lastLen - 1) << 3) | (len - smallest), here, lastLen, @@ -259,7 +273,7 @@ public class DEZEncoder implements ByteDeltaEncoder, DEZFormat { encodeAddr(addr); if (dump) - System.out.printf("%02x %08x: COPY %d @ $%08x COPY %d @ $%08x\n", + System.out.printf("%02x %08x: COPY #%d @ $%08x COPY #%d @ $%08x\n", ((lastLen - smallest) << 3) | (len - smallest) | 0x40, here, lastLen, lastAddr, len, addr); diff --git a/src/notzed.dez/classes/au/notzed/dez/DEZFormat.java b/src/notzed.dez/classes/au/notzed/dez/DEZFormat.java index 66f653d..399c0bb 100644 --- a/src/notzed.dez/classes/au/notzed/dez/DEZFormat.java +++ b/src/notzed.dez/classes/au/notzed/dez/DEZFormat.java @@ -37,8 +37,8 @@ package au.notzed.dez; *

  * HEADER:
  *  XXXX        magic      4 bytes ASCII, 'DEZ1'.
- *  Ciiiiiii+   flags      None are defined.
  *  Ciiiiiii+   smallest   Smallest copy offset.
+ *  Ciiiiiii+   split      Split point for single operation opcode.
  *  Ciiiiiii+   source     Source data size.
  *  Ciiiiiii+   target     Target data size.
  *
@@ -56,18 +56,19 @@ package au.notzed.dez;
  * 8th bit set. A range test defines the operation.
  * 
  *
- * 1nnnnnnn:
+ * 1nnnnnnn:  assuming a default split=100
  *
  * [000 099] aaaaaaaa+           COPY Copy (n+smallest) from the address.
+ *                                      smallest .. smallest+99
  * [100 123] dddddddd?           ADD  Append (n-99) from the patch.
+ *                                             1 .. 24
  * [    124] Ciiiiiii+ aaaaaaaa+ COPY Copy (i+100+smallest) from the address.
- * [    125] Ciiiiiii+ dddddddd? ADD  Append (i+12+1) from the patch.
+ * [    125] Ciiiiiii+ dddddddd? ADD  Append (i+(124-100)+1) from the patch.
  * [    126] Ciiiiiii+ dddddddd  RUN  Append (i+3) copies of the data byte.
  * [    127]                          Reserved.
  * 
- * The split point between immediate-length COPY and ADD can be altered via - * modifying {@link #OP_SINGLE_SPLIT}, but it will create an incompatible - * patch. + * The split point between an immediate-length COPY and ADD is stored in the + * stream. The default {@link #OP_SINGLE_SPLIT} is {@value #OP_SINGLE_SPLIT}. *

* Dual command perform two operations with a single opcode. The lengths * are encoded as immediate values and have a limited range of 3 bits each. @@ -101,7 +102,7 @@ package au.notzed.dez; * * The first byte determines the address encoding. *

- * 
+ *
  *  00nnnnnn           - use address 'n' exactly.
  *  01Snnnnn Ciiiiiii+ - use address 'n' plus (S=0) or minus (S=1) 'i'
  *  1iiiiiii Ciiiiiii+ - use address 'i' absolute.  Always at least 2 bytes.
@@ -141,7 +142,7 @@ public interface DEZFormat {
 	 */
 	public static final int OP_SINGLE = 0x80;
 	/**
-	 * The split-point between copy and add for immediate length single operations.
+	 * The default split-point between copy and add for immediate length single operations.
 	 * 

* A code below this value is a COPY, otherwise it is an ADD. */ @@ -195,7 +196,9 @@ public interface DEZFormat { public static final int ADDR_SIGN = 0x20; /** - * Creates a DEZ-1 format patch from byte sources. + * Creates a DEZ-1 format patch from data. + *

+ * This uses typical encoding parameters suitable for small textual blobs. * * @param source * @param target @@ -206,7 +209,30 @@ public interface DEZFormat { } /** - * Apply a DEZ-1 patch. + * Creates a DEZ-1 format patch from data. + *

+ * This form allows tuning parameters. + *

+ *
blockSize
Recommended 6, typically 4-8. A smaller value will allow + * matching shorter runs but will increase hash collision rates and increase + * encoding times. + *
stepSize
Recommended 1 for text, typically 1-8. This has the largest + * impact on encoding time as it directly relates to the number of run-length + * tests. Smaller values will always produce more compact patches. + *
+ * + * @param blockSize The size of blocks considered for matching. Will be clamped to be ≥ 3. + * @param stepSize The sampling step size. Will be clamped to be ≥ 1. + * @param source Source bytes. + * @param target Target bytes. + * @return patch the binary delta. + */ + public static byte[] encode(int blockSize, int stepSize, byte[] source, byte[] target) { + return ByteDeltaEncoder.toDiff(new ByteMatcherHash(blockSize, 4, source, stepSize, target), new DEZEncoder()); + } + + /** + * Apply a DEZ-1 patch to data. * * @param source * @param patch diff --git a/src/notzed.dez/tests/au/notzed/dez/ByteMatcherHashTest.java b/src/notzed.dez/tests/au/notzed/dez/ByteMatcherHashTest.java index 858b9c3..3c047bf 100644 --- a/src/notzed.dez/tests/au/notzed/dez/ByteMatcherHashTest.java +++ b/src/notzed.dez/tests/au/notzed/dez/ByteMatcherHashTest.java @@ -270,7 +270,6 @@ public class ByteMatcherHashTest { byte[] target = "plain bread melts melts the brains.".getBytes(); ByteMatcherHash bm = new ByteMatcherHash(4, 4, source, 1, target); - //Expected.make("testSequence", bm); Expected.check(testSequence, bm); } diff --git a/src/notzed.dez/tests/au/notzed/dez/DEZEncoderTest.java b/src/notzed.dez/tests/au/notzed/dez/DEZEncoderTest.java index 14ae02b..ead072c 100644 --- a/src/notzed.dez/tests/au/notzed/dez/DEZEncoderTest.java +++ b/src/notzed.dez/tests/au/notzed/dez/DEZEncoderTest.java @@ -61,8 +61,8 @@ public class DEZEncoderTest { assertArrayEquals(patch, new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, (byte) len, (byte) len, (byte) (128 + len - 4), 0, @@ -85,8 +85,8 @@ public class DEZEncoderTest { assertArrayEquals(patch, new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 0, (byte) len, (byte) (128 + 100 + len - 1), 1, 2, 3, 4, @@ -108,8 +108,8 @@ public class DEZEncoderTest { assertArrayEquals(patch, new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 0, (byte) len, (byte) (126 | 0x80), @@ -133,8 +133,8 @@ public class DEZEncoderTest { assertArrayEquals(patch, new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 0, (byte) ((len >> 7) | 0x80), (byte) (len & 127), @@ -159,8 +159,8 @@ public class DEZEncoderTest { assertArrayEquals(patch, new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 0, (byte) ((len >> 7) | 0x80), (byte) (len & 127), @@ -187,8 +187,8 @@ public class DEZEncoderTest { assertArrayEquals(patch, new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 0, (byte) len, (byte) (128 + 100 + 5 - 1), 1, 2, 3, 4, 5, @@ -212,8 +212,8 @@ public class DEZEncoderTest { assertArrayEquals(new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 16, (byte) len, (byte) ((10 - smallest) << 3 | (10 - smallest) | 0x40), @@ -227,7 +227,7 @@ public class DEZEncoderTest { System.out.println("copy copy short smallest!=4"); int len = 20; int smallest = 7; - DEZEncoder instance = new DEZEncoder(smallest); + DEZEncoder instance = new DEZEncoder(smallest, DEZEncoder.OP_SINGLE_SPLIT); instance.init(16, len); instance.copy(0, 10); @@ -237,8 +237,8 @@ public class DEZEncoderTest { assertArrayEquals(new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, (byte) smallest, + DEZEncoder.OP_SINGLE_SPLIT, 16, (byte) len, (byte) ((10 - smallest) << 3 | (10 - smallest) | 0x40), @@ -267,8 +267,8 @@ public class DEZEncoderTest { assertArrayEquals(new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 16, (byte) len, (byte) (((10 - smallest) << 3) | (10 - smallest) | 0x40), @@ -294,8 +294,8 @@ public class DEZEncoderTest { assertArrayEquals(new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 16, (byte) len, (byte) (((10 - smallest) << 3) | (10 - smallest) | 0x40), @@ -319,8 +319,8 @@ public class DEZEncoderTest { assertArrayEquals(new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 0, 0, 0x01, 0x23, 0x45, 0x67 @@ -331,7 +331,7 @@ public class DEZEncoderTest { public void testFormatSmallest() { System.out.println("smallest non-default"); - DEZEncoder instance = new DEZEncoder(6); + DEZEncoder instance = new DEZEncoder(6, DEZEncoder.OP_SINGLE_SPLIT); byte[] source = new byte[0]; byte[] target = new byte[0]; instance.init(source.length, target.length); @@ -340,8 +340,8 @@ public class DEZEncoderTest { assertArrayEquals(new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 6, + DEZEncoder.OP_SINGLE_SPLIT, 0, 0, 0x01, 0x23, 0x45, 0x67 @@ -352,7 +352,7 @@ public class DEZEncoderTest { public void testFormatSmallestDefault() { System.out.println("smallest default=4"); - DEZEncoder instance = new DEZEncoder(4); + DEZEncoder instance = new DEZEncoder(4, DEZEncoder.OP_SINGLE_SPLIT); byte[] source = new byte[0]; byte[] target = new byte[0]; instance.init(source.length, target.length); @@ -361,12 +361,36 @@ public class DEZEncoderTest { assertArrayEquals(new byte[]{ DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], - 0, 4, + DEZEncoder.OP_SINGLE_SPLIT, 0, 0, 0x01, 0x23, 0x45, 0x67 }, patch); } + @Test + public void testFormatSplit() { + System.out.println("split non-default"); + int[] splits = {0, 1, 62, 123, 124}; + + for (int split : splits) { + DEZEncoder instance = new DEZEncoder(6, split); + byte[] source = new byte[0]; + byte[] target = new byte[0]; + instance.init(source.length, target.length); + + byte[] patch = instance.toPatch(0x01234567); + + assertArrayEquals(new byte[]{ + DEZEncoder.MAGIC[0], DEZEncoder.MAGIC[1], DEZEncoder.MAGIC[2], DEZEncoder.MAGIC[3], + 6, + (byte) split, + 0, + 0, + 0x01, 0x23, 0x45, 0x67 + }, patch); + } + } + } diff --git a/src/notzed.dez/tests/au/notzed/dez/DEZIT.java b/src/notzed.dez/tests/au/notzed/dez/DEZIT.java index 712bc5f..4130f7b 100644 --- a/src/notzed.dez/tests/au/notzed/dez/DEZIT.java +++ b/src/notzed.dez/tests/au/notzed/dez/DEZIT.java @@ -47,15 +47,41 @@ public class DEZIT { public void tearDown() { } + final static byte[] sourceData = "the rains in Spain fall mainly on the plains".getBytes(); + final static byte[] targetData = "plain breads in Spain aids your dames brains".getBytes(); + + byte[] expand(byte[] s, int len) { + byte[] data = new byte[len]; + for (int i = 0; i < data.length; i += s.length) { + int left = Math.min(s.length, data.length - i); + System.arraycopy(s, 0, data, i, left); + } + return data; + } + + /* + Modifies s so that is has umodified blocks of length n bracketed + by changed, unique blocks of size 'size' + */ + void twiddle(byte[] s, int n, int size) { + int off = 0; + + for (int i = n; i < s.length; i += n) { + for (int j = 0; i < s.length && j < size; j++) + s[i++] += off; + off += 1; + } + } + @Test public void testSmallest() throws Exception { System.out.println("decode/encode smallest changing"); - byte[] source = "the rains in Spain fall mainly on the plains".getBytes(); - byte[] target = "plain breads in Spain aids your dames brains".getBytes(); + byte[] source = sourceData; + byte[] target = targetData; for (int smallest = 4; smallest <= 16; smallest++) { - DEZEncoder de = new DEZEncoder(smallest); + DEZEncoder de = new DEZEncoder(smallest, DEZEncoder.OP_SINGLE_SPLIT); ByteMatcherHash matcher = new ByteMatcherHash(4, smallest, source, 1, target); byte[] patch = ByteDeltaEncoder.toDiff(matcher, de); DEZDecoder dd = new DEZDecoder(source, patch); @@ -65,6 +91,54 @@ public class DEZIT { } } + @Test + public void testSplit() throws Exception { + System.out.println("decode/encode split changing"); + int[] splits = {0, 1, 2, 3, 4, 62, 120, 121, 122, 123, 124}; + + byte[] source = expand(sourceData, 65536); + byte[] target = expand(targetData, 65536); + + for (int split : splits) { + for (int smallest = 4; smallest <= 16; smallest++) { + DEZEncoder de = new DEZEncoder(smallest, split); + ByteMatcherHash matcher = new ByteMatcherHash(4, smallest, source, 1, target); + byte[] patch = ByteDeltaEncoder.toDiff(matcher, de); + DEZDecoder dd = new DEZDecoder(source, patch); + byte[] result = dd.decode(); + + assertArrayEquals(result, target); + } + } + } + + @Test + public void testSplitSame() throws Exception { + System.out.println("decode/encode edge cases (long test)"); + int[] splits = {0, 1, 2, 3, 4, 62, 120, 121, 122, 123, 124}; + int[] ns = {2, 3, 4, 126, 127, 128, 16383, 16384, 16385}; + int[] sizes = {1, 2, 7, 8, 9, 123, 124, 125, 255, 700}; + + byte[] source = expand(sourceData, 65536); + + for (int n : ns) { + for (int size : sizes) { + byte[] target = source.clone(); + twiddle(target, n, size); + for (int split : splits) { + int smallest = 4; + DEZEncoder de = new DEZEncoder(smallest, split); + ByteMatcherHash matcher = new ByteMatcherHash(4, smallest, source, 1, target); + byte[] patch = ByteDeltaEncoder.toDiff(matcher, de); + DEZDecoder dd = new DEZDecoder(source, patch); + byte[] result = dd.decode(); + + assertArrayEquals(result, target); + } + } + } + } + @Test public void testEmptySource() throws Exception { System.out.println("empty source"); @@ -73,7 +147,7 @@ public class DEZIT { byte[] target = "plain breads in Spain aids your dames brains".getBytes(); for (int smallest = 4; smallest <= 16; smallest++) { - DEZEncoder de = new DEZEncoder(smallest); + DEZEncoder de = new DEZEncoder(smallest, DEZEncoder.OP_SINGLE_SPLIT); ByteMatcherHash matcher = new ByteMatcherHash(4, smallest, source, 1, target); byte[] patch = ByteDeltaEncoder.toDiff(matcher, de); DEZDecoder dd = new DEZDecoder(source, patch); @@ -91,7 +165,7 @@ public class DEZIT { byte[] target = "".getBytes(); for (int smallest = 4; smallest <= 16; smallest++) { - DEZEncoder de = new DEZEncoder(smallest); + DEZEncoder de = new DEZEncoder(smallest, DEZEncoder.OP_SINGLE_SPLIT); ByteMatcherHash matcher = new ByteMatcherHash(4, smallest, source, 1, target); byte[] patch = ByteDeltaEncoder.toDiff(matcher, de); DEZDecoder dd = new DEZDecoder(source, patch); @@ -108,15 +182,13 @@ public class DEZIT { byte[] source = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".getBytes(); byte[] target = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".getBytes(); - for (int smallest = 4; smallest <= 16; smallest++) { - DEZEncoder de = new DEZEncoder(smallest); - ByteMatcherHash matcher = new ByteMatcherHash(4, smallest, source, 1, target); - byte[] patch = ByteDeltaEncoder.toDiff(matcher, de); - DEZDecoder dd = new DEZDecoder(source, patch); - byte[] result = dd.decode(); + DEZEncoder de = new DEZEncoder(); + ByteMatcherHash matcher = new ByteMatcherHash(4, 4, source, 1, target); + byte[] patch = ByteDeltaEncoder.toDiff(matcher, de); + DEZDecoder dd = new DEZDecoder(source, patch); + byte[] result = dd.decode(); - assertArrayEquals(result, target); - } + assertArrayEquals(result, target); } @Test @@ -127,7 +199,7 @@ public class DEZIT { byte[] target = "plain breads in Spain aids your dames brains".getBytes(); for (int pos = 1; pos <= 4; pos++) { - DEZEncoder de = new DEZEncoder(4); + DEZEncoder de = new DEZEncoder(4, DEZEncoder.OP_SINGLE_SPLIT); ByteMatcherHash matcher = new ByteMatcherHash(4, 4, source, 1, target); byte[] patch = ByteDeltaEncoder.toDiff(matcher, de); -- 2.39.5