From 30bf0f51335e87812ffeb54e9437f0b6a1514d67 Mon Sep 17 00:00:00 2001 From: "Juan J. Martinez" Date: Tue, 6 Sep 2022 07:37:20 +0100 Subject: Updated rasm to 1.7 --- tools/rasm/decrunch/aplib_z80_todo.asm | 190 --------------------- tools/rasm/decrunch/dzx0_fast.asm | 237 +++++++++++++++++++++++++ tools/rasm/decrunch/dzx0_standard.asm | 64 +++++++ tools/rasm/decrunch/dzx0_standard_back.asm | 65 +++++++ tools/rasm/decrunch/dzx0_turbo_back.asm | 101 +++++++++++ tools/rasm/decrunch/dzx7_turbo.asm | 4 +- tools/rasm/decrunch/exomizer3megachur.asm | 210 ----------------------- tools/rasm/decrunch/lz48decrunch_v006.asm | 113 ------------ tools/rasm/decrunch/lz48decrunch_v006b.asm | 78 +++++++++ tools/rasm/decrunch/unaplib.asm | 190 +++++++++++++++++++++ tools/rasm/decrunch/unaplib_fast.asm | 266 +++++++++++++++++++++++++++++ tools/rasm/decrunch/unlzsa1_fast.asm | 204 ++++++++++++++++++++++ tools/rasm/decrunch/unlzsa2_fast.asm | 189 ++++++++++++++++++++ 13 files changed, 1396 insertions(+), 515 deletions(-) delete mode 100644 tools/rasm/decrunch/aplib_z80_todo.asm create mode 100644 tools/rasm/decrunch/dzx0_fast.asm create mode 100644 tools/rasm/decrunch/dzx0_standard.asm create mode 100644 tools/rasm/decrunch/dzx0_standard_back.asm create mode 100644 tools/rasm/decrunch/dzx0_turbo_back.asm delete mode 100644 tools/rasm/decrunch/exomizer3megachur.asm delete mode 100644 tools/rasm/decrunch/lz48decrunch_v006.asm create mode 100644 tools/rasm/decrunch/lz48decrunch_v006b.asm create mode 100644 tools/rasm/decrunch/unaplib.asm create mode 100644 tools/rasm/decrunch/unaplib_fast.asm create mode 100644 tools/rasm/decrunch/unlzsa1_fast.asm create mode 100755 tools/rasm/decrunch/unlzsa2_fast.asm (limited to 'tools/rasm/decrunch') diff --git a/tools/rasm/decrunch/aplib_z80_todo.asm b/tools/rasm/decrunch/aplib_z80_todo.asm deleted file mode 100644 index 6843a14..0000000 --- a/tools/rasm/decrunch/aplib_z80_todo.asm +++ /dev/null @@ -1,190 +0,0 @@ -;Z80 Version by Dan Weiss -;Call depack. -;hl = source -;de = dest - -ap_bits: .db 0 -ap_byte: .db 0 -lwm: .db 0 -r0: .dw 0 - -ap_getbit: - push bc - ld bc,(ap_bits) - rrc c - jr nc,ap_getbit_continue - ld b,(hl) - inc hl -ap_getbit_continue: - ld a,c - and b - ld (ap_bits),bc - pop bc - ret - -ap_getbitbc: ;doubles BC and adds the read bit - sla c - rl b - call ap_getbit - ret z - inc bc - ret - -ap_getgamma: - ld bc,1 -ap_getgammaloop: - call ap_getbitbc - call ap_getbit - jr nz,ap_getgammaloop - ret - - -depack: - ;hl = source - ;de = dest - ldi - xor a - ld (lwm),a - inc a - ld (ap_bits),a - -aploop: - call ap_getbit - jp z, apbranch1 - call ap_getbit - jr z, apbranch2 - call ap_getbit - jr z, apbranch3 - ;LWM = 0 - xor a - ld (lwm),a - ;get an offset - ld bc,0 - call ap_getbitbc - call ap_getbitbc - call ap_getbitbc - call ap_getbitbc - ld a,b - or c - jr nz,apbranch4 - xor a ;write a 0 - ld (de),a - inc de - jr aploop -apbranch4: - ex de,hl ;write a previous bit (1-15 away from dest) - push hl - sbc hl,bc - ld a,(hl) - pop hl - ld (hl),a - inc hl - ex de,hl - jr aploop -apbranch3: - ;use 7 bit offset, length = 2 or 3 - ;if a zero is encountered here, it's EOF - ld c,(hl) - inc hl - rr c - ret z - ld b,2 - jr nc,ap_dont_inc_b - inc b -ap_dont_inc_b: - ;LWM = 1 - ld a,1 - ld (lwm),a - - push hl - ld a,b - ld b,0 - ;R0 = c - ld (r0),bc - ld h,d - ld l,e - or a - sbc hl,bc - ld c,a - ldir - pop hl - jr aploop -apbranch2: - ;use a gamma code * 256 for offset, another gamma code for length - call ap_getgamma - dec bc - dec bc - ld a,(lwm) - or a - jr nz,ap_not_lwm - ;bc = 2? - ld a,b - or c - jr nz,ap_not_zero_gamma - ;if gamma code is 2, use old r0 offset, and a new gamma code for length - call ap_getgamma - push hl - ld h,d - ld l,e - push bc - ld bc,(r0) - sbc hl,bc - pop bc - ldir - pop hl - jr ap_finishup - -ap_not_zero_gamma: - dec bc -ap_not_lwm: - ;do I even need this code? - ;bc=bc*256+(hl), lazy 16bit way - ld b,c - ld c,(hl) - inc hl - ld (r0),bc - push bc - call ap_getgamma - ex (sp),hl - ;bc = len, hl=offs - push de - ex de,hl - ;some comparison junk for some reason - ld hl,31999 - or a - sbc hl,de - jr nc,skip1 - inc bc -skip1: - ld hl,1279 - or a - sbc hl,de - jr nc,skip2 - inc bc -skip2: - ld hl,127 - or a - sbc hl,de - jr c,skip3 - inc bc - inc bc -skip3: - ;bc = len, de = offs, hl=junk - pop hl - push hl - or a - sbc hl,de - pop de - ;hl=dest-offs, bc=len, de = dest - ldir - pop hl -ap_finishup: - ld a,1 - ld (lwm),a - jp aploop - -apbranch1: - ldi - xor a - ld (lwm),a - jp aploop diff --git a/tools/rasm/decrunch/dzx0_fast.asm b/tools/rasm/decrunch/dzx0_fast.asm new file mode 100644 index 0000000..55f4388 --- /dev/null +++ b/tools/rasm/decrunch/dzx0_fast.asm @@ -0,0 +1,237 @@ +; +; Speed-optimized ZX0 decompressor by spke (187 bytes) +; +; ver.00 by spke (27/01-23/03/2021, 191 bytes) +; ver.01 by spke (24/03/2021, 193(+2) bytes - fixed a bug in the initialization) +; ver.01patch2 by uniabis (25/03/2021, 191(-2) bytes - fixed a bug with elias over 8bits) +; ver.01patch9 by uniabis (10/09/2021, 187(-4) bytes - support for new v2 format) +; +; Original ZX0 decompressors were written by Einar Saukas +; +; This decompressor was written on the basis of "Standard" decompressor by +; Einar Saukas and optimized for speed by spke. This decompressor is +; about 5% faster than the "Turbo" decompressor, which is 128 bytes long. +; It has about the same speed as the 412 bytes version of the "Mega" decompressor. +; +; The decompressor uses AF, BC, DE, HL and IX and relies upon self-modified code. +; +; The decompression is done in the standard way: +; +; ld hl,FirstByteOfCompressedData +; ld de,FirstByteOfMemoryForDecompressedData +; call DecompressZX0 +; +; Of course, ZX0 compression algorithms are (c) 2021 Einar Saukas, +; see https://github.com/einar-saukas/ZX0 for more information +; +; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. + +macro DecompressZX0 + + ld ix, @CopyMatch1 + ld bc, $ffff + ld (@PrevOffset+1), bc ; default offset is -1 + inc bc + ld a, $80 + jr @RunOfLiterals ; BC is assumed to contains 0 most of the time + +@ShorterOffsets: + ld b, $ff ; the top byte of the offset is always $FF + ld c, (hl) + inc hl + rr c + ld (@PrevOffset+1), bc + jr nc, @LongerMatch + +@CopyMatch2: ; the case of matches with len=2 + ld bc, 2 + + ; the faster match copying code +@CopyMatch1: + push hl ; preserve source + +@PrevOffset: + ld hl, $ffff ; restore offset (default offset is -1) + add hl, de ; HL = dest - offset + ldir + pop hl ; restore source + + ; after a match you can have either + ; 0 + = run of literals, or + ; 1 + + [7-bits of offset lsb + 1-bit of length] + = another match +@AfterMatch1: + add a, a + jr nc, @RunOfLiterals + +@UsualMatch: ; this is the case of usual match+offset + add a, a + jr nc, @LongerOffets + jr nz, @ShorterOffsets ; NZ after NC == "confirmed C" + + ld a, (hl) ; reload bits + inc hl + rla + + jr c, @ShorterOffsets + +@LongerOffets: + ld c, $fe + + add a, a ; inline read gamma + rl c + add a, a + jr nc, $-4 + + call z, @ReloadReadGamma + +@ProcessOffset: + + inc c + ret z ; end-of-data marker (only checked for longer offsets) + rr c + ld b, c + ld c, (hl) + inc hl + rr c + ld (@PrevOffset+1), bc + + ; lowest bit is the first bit of the gamma code for length + jr c, @CopyMatch2 + +@LongerMatch: + ld bc, 1 + + add a, a ; inline read gamma + rl c + add a, a + jr nc, $-4 + + call z,@ReloadReadGamma + +@CopyMatch3: + push hl ; preserve source + ld hl, (@PrevOffset+1) ; restore offset + add hl, de ; HL = dest - offset + + ; because BC>=3-1, we can do 2 x LDI safely + ldi + ldir + inc c + ldi + pop hl ; restore source + + ; after a match you can have either + ; 0 + = run of literals, or + ; 1 + + [7-bits of offset lsb + 1-bit of length] + = another match +@AfterMatch3: + add a, a + jr c, @UsualMatch + +@RunOfLiterals: + inc c + add a, a + jr nc, @LongerRun + jr nz, @CopyLiteral ; NZ after NC == "confirmed C" + + ld a, (hl) ; reload bits + inc hl + rla + + jr c, @CopyLiteral + +@LongerRun: + add a, a ; inline read gamma + rl c + add a, a + jr nc, $-4 + + jr nz, @CopyLiterals + + ld a, (hl) ; reload bits + inc hl + rla + + call nc, @ReadGammaAligned + +@CopyLiterals: + ldi + +@CopyLiteral: + ldir + + ; after a literal run you can have either + ; 0 + = match using a repeated offset, or + ; 1 + + [7-bits of offset lsb + 1-bit of length] + = another match + add a, a + jr c, @UsualMatch + +@RepMatch: + inc c + add a, a + jr nc, @LongerRepMatch + jr nz, @CopyMatch1 ; NZ after NC == "confirmed C" + + ld a, (hl) ; reload bits + inc hl + rla + + jr c, @CopyMatch1 + +@LongerRepMatch: + add a, a ; inline read gamma + rl c + add a, a + jr nc, $-4 + + jp nz, @CopyMatch1 + + ; this is a crafty equivalent of CALL ReloadReadGamma : JP CopyMatch1 + push ix + + ; the subroutine for reading the remainder of the partly read Elias gamma code. + ; it has two entry points: ReloadReadGamma first refills the bit reservoir in A, + ; while ReadGammaAligned assumes that the bit reservoir has just been refilled. +@ReloadReadGamma: + ld a, (hl) ; reload bits + inc hl + rla + + ret c +@ReadGammaAligned: + add a, a + rl c + add a, a + ret c + add a, a + rl c + add a, a +@ReadingLongGamma: ; this loop does not need unrolling, as it does not get much use anyway + ret c + add a, a + rl c + rl b + add a, a + jr nz, @ReadingLongGamma + + ld a, (hl) ; reload bits + inc hl + rla + jr @ReadingLongGamma +mend + diff --git a/tools/rasm/decrunch/dzx0_standard.asm b/tools/rasm/decrunch/dzx0_standard.asm new file mode 100644 index 0000000..6525c8b --- /dev/null +++ b/tools/rasm/decrunch/dzx0_standard.asm @@ -0,0 +1,64 @@ +; ----------------------------------------------------------------------------- +; ZX0 decoder by Einar Saukas & Urusergi +; "Standard" version (68 bytes only) +; ----------------------------------------------------------------------------- +; Parameters: +; HL: source address (compressed data) +; DE: destination address (decompressing) +; ----------------------------------------------------------------------------- + +macro dzx0_standard + ld bc, $ffff ; preserve default offset 1 + push bc + inc bc + ld a, $80 +@dzx0s_literals: + call @dzx0s_elias ; obtain length + ldir ; copy literals + add a, a ; copy from last offset or new offset? + jr c, @dzx0s_new_offset + call @dzx0s_elias ; obtain length +@dzx0s_copy: + ex (sp), hl ; preserve source, restore offset + push hl ; preserve offset + add hl, de ; calculate destination - offset + ldir ; copy from offset + pop hl ; restore offset + ex (sp), hl ; preserve offset, restore source + add a, a ; copy from literals or new offset? + jr nc, @dzx0s_literals +@dzx0s_new_offset: + pop bc ; discard last offset + ld c, $fe ; prepare negative offset + call @dzx0s_elias_loop ; obtain offset MSB + inc c + ret z ; check end marker + ld b, c + ld c, (hl) ; obtain offset LSB + inc hl + rr b ; last offset bit becomes first length bit + rr c + push bc ; preserve new offset + ld bc, 1 ; obtain length + call nc, @dzx0s_elias_backtrack + inc bc + jr @dzx0s_copy +@dzx0s_elias: + inc c ; interlaced Elias gamma coding +@dzx0s_elias_loop: + add a, a + jr nz, @dzx0s_elias_skip + ld a, (hl) ; load another group of 8 bits + inc hl + rla +@dzx0s_elias_skip: + ret c +@dzx0s_elias_backtrack: + add a, a + rl c + rl b + jr @dzx0s_elias_loop +mend +; ----------------------------------------------------------------------------- + + diff --git a/tools/rasm/decrunch/dzx0_standard_back.asm b/tools/rasm/decrunch/dzx0_standard_back.asm new file mode 100644 index 0000000..3da94bd --- /dev/null +++ b/tools/rasm/decrunch/dzx0_standard_back.asm @@ -0,0 +1,65 @@ +; ----------------------------------------------------------------------------- +; ZX0 decoder by Einar Saukas +; "Standard" version (69 bytes only) - BACKWARDS VARIANT +; ----------------------------------------------------------------------------- +; Parameters: +; HL: last source address (compressed data) +; DE: last destination address (decompressing) +; ----------------------------------------------------------------------------- + +Macro dzx0_standard_back + ld bc, 1 ; preserve default offset 1 + push bc + ld a, $80 +@dzx0sb_literals: + call @dzx0sb_elias ; obtain length + lddr ; copy literals + inc c + add a, a ; copy from last offset or new offset? + jr c, @dzx0sb_new_offset + call @dzx0sb_elias ; obtain length +@dzx0sb_copy: + ex (sp), hl ; preserve source, restore offset + push hl ; preserve offset + add hl, de ; calculate destination - offset + lddr ; copy from offset + inc c + pop hl ; restore offset + ex (sp), hl ; preserve offset, restore source + add a, a ; copy from literals or new offset? + jr nc, @dzx0sb_literals +@dzx0sb_new_offset: + inc sp ; discard last offset + inc sp + call @dzx0sb_elias ; obtain offset MSB + dec b + ret z ; check end marker + dec c ; adjust for positive offset + ld b, c + ld c, (hl) ; obtain offset LSB + dec hl + srl b ; last offset bit becomes first length bit + rr c + inc bc + push bc ; preserve new offset + ld bc, 1 ; obtain length + call c, @dzx0sb_elias_backtrack + inc bc + jr @dzx0sb_copy +@dzx0sb_elias_backtrack: + add a, a + rl c + rl b +@dzx0sb_elias: + add a, a ; inverted interlaced Elias gamma coding + jr nz, @dzx0sb_elias_skip + ld a, (hl) ; load another group of 8 bits + dec hl + rla +@dzx0sb_elias_skip: + jr c, @dzx0sb_elias_backtrack + ret +mend +; ----------------------------------------------------------------------------- + + diff --git a/tools/rasm/decrunch/dzx0_turbo_back.asm b/tools/rasm/decrunch/dzx0_turbo_back.asm new file mode 100644 index 0000000..d009e92 --- /dev/null +++ b/tools/rasm/decrunch/dzx0_turbo_back.asm @@ -0,0 +1,101 @@ +; ----------------------------------------------------------------------------- +; ZX0 decoder by Einar Saukas & introspec +; "Turbo" version (126 bytes, 21% faster) - BACKWARDS VARIANT +; ----------------------------------------------------------------------------- +; Parameters: +; HL: last source address (compressed data) +; DE: last destination address (decompressing) +; ----------------------------------------------------------------------------- + +macro dzx0_turbo_back + ld bc, 1 ; preserve default offset 1 + ld (@dzx0tb_last_offset+1), bc + ld a, $80 + jr @dzx0tb_literals +@dzx0tb_new_offset: + add a, a ; obtain offset MSB + call c, @dzx0tb_elias + dec b + ret z ; check end marker + dec c ; adjust for positive offset + ld b, c + ld c, (hl) ; obtain offset LSB + dec hl + srl b ; last offset bit becomes first length bit + rr c + inc bc + ld (@dzx0tb_last_offset+1), bc ; preserve new offset + ld bc, 1 ; obtain length + call c, @dzx0tb_elias_loop + inc bc +@dzx0tb_copy: + push hl ; preserve source +@dzx0tb_last_offset: + ld hl, 0 ; restore offset + add hl, de ; calculate destination - offset + lddr ; copy from offset + inc c + pop hl ; restore source + add a, a ; copy from literals or new offset? + jr c, @dzx0tb_new_offset +@dzx0tb_literals: + add a, a ; obtain length + call c, @dzx0tb_elias + lddr ; copy literals + inc c + add a, a ; copy from last offset or new offset? + jr c, @dzx0tb_new_offset + add a, a ; obtain length + call c, @dzx0tb_elias + jp @dzx0tb_copy +@dzx0tb_elias_loop: + add a, a + rl c + add a, a + ret nc +@dzx0tb_elias: + jp nz, @dzx0tb_elias_loop ; inverted interlaced Elias gamma coding + ld a, (hl) ; load another group of 8 bits + dec hl + rla + ret nc + add a, a + rl c + add a, a + ret nc + add a, a + rl c + add a, a + ret nc + add a, a + rl c + add a, a + ret nc +@dzx0tb_elias_reload: + add a, a + rl c + rl b + add a, a + ld a, (hl) ; load another group of 8 bits + dec hl + rla + ret nc + add a, a + rl c + rl b + add a, a + ret nc + add a, a + rl c + rl b + add a, a + ret nc + add a, a + rl c + rl b + add a, a + jr c, @dzx0tb_elias_reload + ret +; ----------------------------------------------------------------------------- +mend + diff --git a/tools/rasm/decrunch/dzx7_turbo.asm b/tools/rasm/decrunch/dzx7_turbo.asm index 779ced5..cb66be5 100644 --- a/tools/rasm/decrunch/dzx7_turbo.asm +++ b/tools/rasm/decrunch/dzx7_turbo.asm @@ -8,7 +8,7 @@ ; ----------------------------------------------------------------------------- dzx7_turbo: - ld a, $80 + ld a, #80 dzx7t_copy_byte_loop: ldi ; copy literal byte dzx7t_main_loop: @@ -42,7 +42,7 @@ dzx7t_len_value_start: ; determine offset ld e, (hl) ; load offset flag (1 bit) + offset value (7 bits) inc hl - defb $cb, $33 ; opcode for undocumented instruction "SLL E" aka "SLS E" + defb #cb, #33 ; opcode for undocumented instruction "SLL E" aka "SLS E" jr nc, dzx7t_offset_end ; if offset flag is set, load 4 extra bits add a, a ; check next bit call z, dzx7t_load_bits ; no more bits left? diff --git a/tools/rasm/decrunch/exomizer3megachur.asm b/tools/rasm/decrunch/exomizer3megachur.asm deleted file mode 100644 index ea1973e..0000000 --- a/tools/rasm/decrunch/exomizer3megachur.asm +++ /dev/null @@ -1,210 +0,0 @@ -;Exomizer 2 Z80 decoder -;Copyright (C) 2008-2016 by Jaime Tejedor Gomez (Metalbrain) -; -;Optimized by Antonio Villena and Urusergi (169 bytes) -; -;Compression algorithm by Magnus Lind -; -; This depacker is free software; you can redistribute it and/or -; modify it under the terms of the GNU Lesser General Public -; License as published by the Free Software Foundation; either -; version 2.1 of the License, or (at your option) any later version. -; -; This library is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -; Lesser General Public License for more details. -; -; You should have received a copy of the GNU Lesser General Public -; License along with this library; if not, write to the Free Software -; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -; -; -;input- hl=compressed data start -; de=uncompressed destination start -; -; you may change exo_mapbasebits to point to any free buffer -; -;ATTENTION! -;A huge speed boost (around 14%) can be gained at the cost of only 5 bytes. -;If you want this, replace all instances of "call exo_getbit" with "srl a" followed by -;"call z,exo_getbit", and remove the first two instructions in exo_getbit routine. -; --------------------------- -; modified by Megachur in 2018 -; --------------------------- -; hl -> compressed data start -; de -> uncompressed destination start -; --------------------------- - -;EXO_BACKWARD equ 1 -ENABLE_MEXO_GETBIT equ 1 - -list:EXOMIZER_ADDRESS:nolist -; --------------------------- -MACRO MEXO_GETBIT - srl a - jr nz,@1 - ld a,(hl) - IFDEF EXO_BACKWARD - dec hl - ELSE - inc hl - ENDIF - rra -@1 -ENDM - -deexo: - ld iy,exo_mapbasebits+11 - ld a,(hl) - - IFDEF EXO_BACKWARD - dec hl - ELSE - inc hl - ENDIF - - ld b,52 - push de - cp a - -exo_initbits: - ld c,16 - jr nz,exo_get4bits - ld ixl,c - ld de,1 ;DE=b2 - -exo_get4bits: - IFDEF ENABLE_MEXO_GETBIT - MEXO_GETBIT - ELSE - srl a:call z,exo_getbit ;call exo_getbit ;get one bit - ENDIF - rl c - jr nc,exo_get4bits - inc c - push hl - ld hl,1 - ld (iy+41),c ;bits[i]=b1 (and opcode 41 == add hl,hl) - -exo_setbit: - dec c - jr nz,exo_setbit-1 ;jump to add hl,hl instruction - ld (iy-11),e - ld (iy+93),d ;base[i]=b2 - add hl,de - ex de,hl - inc iy - pop hl - dec ixl - djnz exo_initbits - pop de - jr exo_mainloop - -exo_literalrun: - ld e,c ;DE=1 - -exo_getbits: - dec b - ret z - -exo_getbits1: - IFDEF ENABLE_MEXO_GETBIT - MEXO_GETBIT - ELSE - srl a:call z,exo_getbit ;call exo_getbit - ENDIF - rl e - rl d - jr nc,exo_getbits - ld b,d - ld c,e - pop de - -exo_literalcopy: - IFDEF EXO_BACKWARD - lddr - ELSE - ldir - ENDIF -exo_mainloop: - inc c - IFDEF ENABLE_MEXO_GETBIT - MEXO_GETBIT - ELSE - srl a:call z,exo_getbit ;call exo_getbit ;literal? - ENDIF - jr c,exo_literalcopy - ld c,239 -exo_getindex: - IFDEF ENABLE_MEXO_GETBIT - MEXO_GETBIT - ELSE - srl a:call z,exo_getbit ;call exo_getbit - ENDIF - inc c - jr nc,exo_getindex - ret z - push de - ld d,b - jp p,exo_literalrun - ld iy,exo_mapbasebits-229 - call exo_getpair - push de - rlc d - jr nz,exo_dontgo - dec e - ld bc,512+32 ;2 bits,48 offset - jr z,exo_goforit - dec e ;2? -exo_dontgo: - ld bc,1024+16 ;4 bits,32 offset - jr z,exo_goforit - ld de,0 - ld c,d ;16 offset -exo_goforit: - call exo_getbits1 - ld iy,exo_mapbasebits+27 - add iy,de - call exo_getpair - pop bc - ex (sp),hl - IFDEF EXO_BACKWARD - ex de,hl - add hl,de - lddr - ELSE - push hl - sbc hl,de - pop de - ldir - ENDIF - pop hl - jr exo_mainloop ;Next! -exo_getpair: - add iy,bc - ld e,d - ld b,(iy+41) - call exo_getbits - ex de,hl - ld c,(iy-11) - ld b,(iy+93) - add hl,bc ;Always clear C flag - ex de,hl - ret - - IFDEF ENABLE_MEXO_GETBIT - ELSE -exo_getbit: -; srl a -; ret nz - ld a,(hl) - inc hl - rra - ret - ENDIF - -exo_mapbasebits: - ds 156,#00 ;tables for bits,baseL,baseH -; --------------------------- -list:EXOMIZER_ADDRESS_LENGTH equ $-EXOMIZER_ADDRESS:nolist \ No newline at end of file diff --git a/tools/rasm/decrunch/lz48decrunch_v006.asm b/tools/rasm/decrunch/lz48decrunch_v006.asm deleted file mode 100644 index 750b571..0000000 --- a/tools/rasm/decrunch/lz48decrunch_v006.asm +++ /dev/null @@ -1,113 +0,0 @@ -; -; LZ48 decrunch -; -; hl compressed data adress -; de output adress of data -; - - -org #8000 - -; CALL #8000,source,destination -di - -; parameters -ld h,(ix+3) -ld l,(ix+2) -ld d,(ix+1) -ld e,(ix+0) - -call LZ48_decrunch - -ei -ret - - - - - -LZ48_decrunch -ldi -ld b,0 - -nextsequence -ld a,(hl) -inc hl -ld lx,a -and #F0 -jr z,lzunpack ; no litteral bytes -rrca -rrca -rrca -rrca - -ld c,a -cp 15 ; more bytes for length? -jr nz,copyliteral - -getadditionallength -ld a,(hl) -inc hl -inc a -jr nz,lengthnext -inc b -dec bc -jr getadditionallength -lengthnext -dec a -add a,c -ld c,a -ld a,b -adc a,0 -ld b,a ; bc=length - -copyliteral -ldir - -lzunpack -ld a,lx -and #F -add 3 -ld c,a -cp 18 ; more bytes for length? -jr nz,readoffset - -getadditionallengthbis -ld a,(hl) -inc hl -inc a -jr nz,lengthnextbis -inc b -dec bc -jr getadditionallengthbis -lengthnextbis -dec a -add a,c -ld c,a -ld a,b -adc a,0 -ld b,a ; bc=length - -readoffset -; read encoded offset -ld a,(hl) -inc a -ret z ; LZ48 end with zero offset -inc hl -push hl -ld l,a -ld a,e -sub l -ld l,a -ld a,d -sbc a,0 -ld h,a -; source=dest-copyoffset - -copykey -ldir - -pop hl -jr nextsequence - - diff --git a/tools/rasm/decrunch/lz48decrunch_v006b.asm b/tools/rasm/decrunch/lz48decrunch_v006b.asm new file mode 100644 index 0000000..69162a4 --- /dev/null +++ b/tools/rasm/decrunch/lz48decrunch_v006b.asm @@ -0,0 +1,78 @@ +; +; LZ48 decrunch +; + +; In ; HL=compressed data address +; ; DE=output data address +; Out ; HL last address of compressed data read (you must inc once for LZ48 stream) +; ; DE last address of decrunched data write +1 +; ; BC always 3 +; ; A always zero +; ; IXL undetermined +; ; flags (inc a -> 0) +; Modif ; AF, BC, DE, HL, IXL +LZ48_decrunch + ldi + ld b,0 + +nextsequence + ld a,(hl) + inc hl + cp #10 + jr c,lzunpack ; no literal bytes + ld ixl,a + and #f0 + rrca + rrca + rrca + rrca + + cp 15 ; more bytes for literal length? + jr nz,copyliteral +getadditionallength + ld c,(hl) ; get additional literal length byte + inc hl + add a,c ; compute literal length total + jr nc,lengthNC + inc b +lengthNC + inc c + jr z,getadditionallength ; if last literal length byte was 255, we have more bytes to process +copyliteral + ld c,a + ldir + ld a,ixl + and #F +lzunpack + add 3 + cp 18 ; more bytes for match length? + jr nz,readoffset +getadditionallengthbis + ld c,(hl) ; get additional match length byte + inc hl + add a,c ; compute match length size total + jr nc,lengthNCbis + inc b +lengthNCbis + inc c + jr z,getadditionallengthbis ; if last match length byte was 255, we have more bytes to process + +readoffset + ld c,a +; read encoded offset + ld a,(hl) + inc a + ret z ; LZ48 end with zero offset + inc hl + push hl +; source=dest-copyoffset + ; A != 0 here + neg + ld l,a + ld h,#ff + add hl,de +copykey + ldir + + pop hl + jr nextsequence diff --git a/tools/rasm/decrunch/unaplib.asm b/tools/rasm/decrunch/unaplib.asm new file mode 100644 index 0000000..13c8678 --- /dev/null +++ b/tools/rasm/decrunch/unaplib.asm @@ -0,0 +1,190 @@ +;Z80 Version by Dan Weiss +;Call depack. +;hl = source +;de = dest + +ap_bits: db 0 +ap_byte: db 0 +lwm: db 0 +r0: dw 0 + +ap_getbit: + push bc + ld bc,(ap_bits) + rrc c + jr nc,ap_getbit_continue + ld b,(hl) + inc hl +ap_getbit_continue: + ld a,c + and b + ld (ap_bits),bc + pop bc + ret + +ap_getbitbc: ;doubles BC and adds the read bit + sla c + rl b + call ap_getbit + ret z + inc bc + ret + +ap_getgamma: + ld bc,1 +ap_getgammaloop: + call ap_getbitbc + call ap_getbit + jr nz,ap_getgammaloop + ret + + +depack: + ;hl = source + ;de = dest + ldi + xor a + ld (lwm),a + inc a + ld (ap_bits),a + +aploop: + call ap_getbit + jp z, apbranch1 + call ap_getbit + jr z, apbranch2 + call ap_getbit + jr z, apbranch3 + ;LWM = 0 + xor a + ld (lwm),a + ;get an offset + ld bc,0 + call ap_getbitbc + call ap_getbitbc + call ap_getbitbc + call ap_getbitbc + ld a,b + or c + jr nz,apbranch4 + xor a ;write a 0 + ld (de),a + inc de + jr aploop +apbranch4: + ex de,hl ;write a previous bit (1-15 away from dest) + push hl + sbc hl,bc + ld a,(hl) + pop hl + ld (hl),a + inc hl + ex de,hl + jr aploop +apbranch3: + ;use 7 bit offset, length = 2 or 3 + ;if a zero is encountered here, it's EOF + ld c,(hl) + inc hl + rr c + ret z + ld b,2 + jr nc,ap_dont_inc_b + inc b +ap_dont_inc_b: + ;LWM = 1 + ld a,1 + ld (lwm),a + + push hl + ld a,b + ld b,0 + ;R0 = c + ld (r0),bc + ld h,d + ld l,e + or a + sbc hl,bc + ld c,a + ldir + pop hl + jr aploop +apbranch2: + ;use a gamma code * 256 for offset, another gamma code for length + call ap_getgamma + dec bc + dec bc + ld a,(lwm) + or a + jr nz,ap_not_lwm + ;bc = 2? + ld a,b + or c + jr nz,ap_not_zero_gamma + ;if gamma code is 2, use old r0 offset, and a new gamma code for length + call ap_getgamma + push hl + ld h,d + ld l,e + push bc + ld bc,(r0) + sbc hl,bc + pop bc + ldir + pop hl + jr ap_finishup + +ap_not_zero_gamma: + dec bc +ap_not_lwm: + ;do I even need this code? + ;bc=bc*256+(hl), lazy 16bit way + ld b,c + ld c,(hl) + inc hl + ld (r0),bc + push bc + call ap_getgamma + ex (sp),hl + ;bc = len, hl=offs + push de + ex de,hl + ;some comparison junk for some reason + ld hl,31999 + or a + sbc hl,de + jr nc,skip1 + inc bc +skip1: + ld hl,1279 + or a + sbc hl,de + jr nc,skip2 + inc bc +skip2: + ld hl,127 + or a + sbc hl,de + jr c,skip3 + inc bc + inc bc +skip3: + ;bc = len, de = offs, hl=junk + pop hl + push hl + or a + sbc hl,de + pop de + ;hl=dest-offs, bc=len, de = dest + ldir + pop hl +ap_finishup: + ld a,1 + ld (lwm),a + jp aploop + +apbranch1: + ldi + xor a + ld (lwm),a + jp aploop diff --git a/tools/rasm/decrunch/unaplib_fast.asm b/tools/rasm/decrunch/unaplib_fast.asm new file mode 100644 index 0000000..47ad16b --- /dev/null +++ b/tools/rasm/decrunch/unaplib_fast.asm @@ -0,0 +1,266 @@ +; +; Speed-optimized ApLib decompressor by spke (ver.04 spring 2020, 236 bytes) +; +; The original Z80 decompressors for ApLib were written by Dan Weiss (Dwedit), +; then tweaked by Francisco Javier Pena Pareja (utopian), +; and optimized by Jaime Tejedor Gomez (Metalbrain) and Antonio Villena. +; +; This is a new "implicit state" decompressor heavily optimized for speed by spke. +; (It is 11 bytes shorter and 14% faster than the previously fastest +; 247b decompressor by Metalbrain and Antonio Villena.) +; +; ver.00 by spke (21/08/2018-01/09/2018, 244 bytes, an edit of the existing 247b decompressor); +; ver.01 by spke (12-13/11/2018, 234(-10) bytes, +3% speed using the state machine for LWM); +; ver.02 by spke (06/08/2019, +1% speed); +; ver.03 by spke (27/08/2019, 236(+2) bytes, +1% speed using partly expanded LDIR); +; ver.04 by spke (spring 2020, added full revision history and support for long offsets) +; +; The data must be compressed using any compressor for ApLib capable of generating raw data. +; At present, two best available compressors are: +; +; "APC" by Sven-Ake Dahl: https://github.com/svendahl/cap or +; "apultra" by Emmanuel Marty: https://github.com/emmanuel-marty/apultra +; +; The compression can done as follows: +; +; apc.exe e +; or +; apultra.exe +; +; A decent compressor was written by r57shell (although it is worse than compressors above): +; http://gendev.spritesmind.net/forum/viewtopic.php?p=32548#p32548 +; The use of the official ApLib compressor by Joergen Ibsen is not recommended. +; +; The decompression is done in the standard way: +; +; ld hl,FirstByteOfCompressedData +; ld de,FirstByteOfMemoryForDecompressedData +; call DecompressApLib +; +; The decompressor modifies AF, AF', BC, DE, HL, IXH, IY. +; (However, note that the option "AllowSelfmodifyingCode" removes the dependency on IY.) +; +; Of course, ApLib compression algorithms are (c) 1998-2014 Joergen Ibsen, +; see http://www.ibsensoftware.com/ for more information +; +; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. + +; DEFINE SupportLongOffsets ; +4 bytes for long offset support. slows decompression down by 1%, but may be needed to decompress files >=32K + +MACRO ApUnpack + + ld a,128 : jr @LWM0_CASE0 + +;================================================================================================================== +;================================================================================================================== +;================================================================================================================== + +@LWM0: ;LWM = 0 (LWM stands for "Last Was Match"; a flag that we did not have a match) + +@LWM0_ReloadByteC0 ld a,(hl) : inc hl : rla + jr c,@LWM0_Check2ndBit + +; +; case "0"+BYTE: copy a single literal + +@LWM0_CASE0: ldi ; first byte is always copied as literal + +; +; main decompressor loop + +@LWM0_MainLoop: add a : jr z,@LWM0_ReloadByteC0 : jr nc,@LWM0_CASE0 ; "0"+BYTE = copy literal +@LWM0_Check2ndBit add a : call z,@ReloadByte : jr nc,@LWM0_CASE10 ; "10"+gamma(offset/256)+BYTE+gamma(length) = the main matching mechanism + add a : call z,@ReloadByte : jp c,@LWM1_CASE111 ; "110"+[oooooool] = matched 2-3 bytes with a small offset + +; +; branch "110"+[oooooool]: copy two or three bytes (bit "l") with the offset -1..-127 (bits "ooooooo"), or stop + +@LWM0_CASE110: ; "use 7 bit offset, length = 2 or 3" + ; "if a zero is found here, it's EOF" + ld c,(hl) : rr c : ret z ; process EOF + inc hl + ld b,0 + + ld iyl,c : ld iyh,b ; save offset for future LWMs + + push hl ; save src + ld h,d : ld l,e ; HL = dest + jr c,@LWM0_LengthIs3 + +@LWM0_LengthIs2 sbc hl,bc + ldi : ldi + jr @LWM0_PreMainLoop + +@LWM0_LengthIs3 or a : sbc hl,bc + ldi : ldi : ldi + jr @LWM0_PreMainLoop + +; +; branch "10"+gamma(offset/256)+BYTE+gamma(length): the main matching mechanism + +@LWM0_CASE10: ; "use a gamma code * 256 for offset, another gamma code for length" + call @GetGammaCoded + + ; the original decompressor contains + ; + ; if ((LWM == 0) && (offs == 2)) { ... } + ; else { + ; if (LWM == 0) { offs -= 3; } + ; else { offs -= 2; } + ; } + ; + ; so, the idea here is to use the fact that GetGammaCoded returns (offset/256)+2, + ; and to split the first condition by noticing that C-1 can never be zero + dec c : dec c : jr z,@LWM1_KickInLWM + +@LWM0_AfterLWM dec c : ld b,c : ld c,(hl) : inc hl ; BC = offset + + ld iyl,c : ld iyh,b : push bc + + call @GetGammaCoded ; BC = len* + + ex (sp),hl + + ; interpretation of length value is offset-dependent: + ; if (offs >= 32000) len++; if (offs >= 1280) len++; if (offs < 128) len+=2; + ; in other words, + ; (1 <= offs < 128) +=2 + ; (128 <= offs < 1280) +=0 + ; (1280 <= offs < 31999) +=1 + ; NB offsets over 32000 need one more check, but other Z80 decompressors seem to ignore it. is it not needed? + + ; interpretation of length value is offset-dependent + exa : ld a,h + IFDEF SupportLongOffsets + ; NB offsets over 32000 require an additional check, which is skipped in most + ; Z80 decompressors (seemingly as a performance optimization) + cp 32000>>8 : jr nc,@LWM0_Add2 + ENDIF + cp 5 : jr nc,@LWM0_Add1 + or a : jr nz,@LWM0_Add0 + bit 7,l : jr nz,@LWM0_Add0 +@LWM0_Add2 inc bc +@LWM0_Add1 inc bc +@LWM0_Add0 ; for offs<128 : 4+4+7+7 + 4+7 + 8+7 + 6+6 = 60t + ; for offs>=1280 : 4+4+7+12 + 6 = 33t + ; for 128<=offs<1280 : 4+4+7+7 + 4+12 = 38t OR 4+4+7+7 + 4+7+8+12 = 53t +; dec bc + +@LWM0_CopyMatch: ; this assumes that BC = len, DE = offset, HL = dest + ; and also that (SP) = src, while having NC + ld a,e : sub l : ld l,a + ld a,d : sbc h +@LWM0_CopyMatchLDH ld h,a : ldi : ldir : exa +@LWM0_PreMainLoop pop hl ; recover src + +;================================================================================================================== +;================================================================================================================== +;================================================================================================================== + +@LWM1: ; LWM = 1 + +; +; main decompressor loop + +@LWM1_MainLoop: add a : jr z,@LWM1_ReloadByteC0 : jr nc,@LWM0_CASE0 ; "0"+BYTE = copy literal +@LWM1_Check2ndBit add a : call z,@ReloadByte : jr nc,@LWM1_CASE10 ; "10"+gamma(offset/256)+BYTE+gamma(length) = the main matching mechanism + add a : call z,@ReloadByte : jr nc,@LWM0_CASE110 ; "110"+[oooooool] = matched 2-3 bytes with a small offset + +; +; case "111"+"oooo": copy a byte with offset -1..-15, or write zero to dest + +@LWM1_CASE111: ld bc,%11100000 + add a : call z,@ReloadByte : rl c ; read short offset (4 bits) + add a : call z,@ReloadByte : rl c ; read short offset (4 bits) + add a : call z,@ReloadByte : rl c ; read short offset (4 bits) + add a : call z,@ReloadByte : rl c ; read short offset (4 bits) + ex de,hl : jr z,@LWM1_WriteZero ; zero offset means "write zero" (NB: B is zero here) + + ; "write a previous byte (1-15 away from dest)" + push hl ; BC = offset, DE = src, HL = dest + sbc hl,bc ; HL = dest-offset (SBC works because branching above ensured NC) + ld b,(hl) + pop hl + +@LWM1_WriteZero ld (hl),b : ex de,hl + inc de : jp @LWM0_MainLoop ; 10+4*(4+10+8)+4+7 + 11+15+7+10 + 7+4+6+10 = 179t + +@LWM1_ReloadByteC0 ld a,(hl) : inc hl : rla + jp nc,@LWM0_CASE0 + jr @LWM1_Check2ndBit + +; +; branch "10"+gamma(offset/256)+BYTE+gamma(length): the main matching mechanism + +@LWM1_CASE10: ; "use a gamma code * 256 for offset, another gamma code for length" + call @GetGammaCoded + + ; the original decompressor contains + ; + ; if ((LWM == 0) && (offs == 2)) { ... } + ; else { + ; if (LWM == 0) { offs -= 3; } + ; else { offs -= 2; } + ; } + ; + ; so, the idea here is to use the fact that GetGammaCoded returns (offset/256)+2, + ; and to split the first condition by noticing that C-1 can never be zero + dec c : jp @LWM0_AfterLWM + +; +; the re-use of the previous offset (LWM magic) + +@LWM1_KickInLWM: ; "and a new gamma code for length" + call @GetGammaCoded ; BC = len + push hl + exa : ld a,e : sub iyl : ld l,a + ld a,d : sbc iyh + jp @LWM0_CopyMatchLDH + +;================================================================================================================== +;================================================================================================================== +;================================================================================================================== + +; +; interlaced gamma code reader +; x0 -> 1x +; x1y0 -> 1xy +; x1y1z0 -> 1xyz etc +; (technically, this is a 2-based variation of Exp-Golomb-1) + +@GetGammaCoded: ld bc,1 +@ReadGamma add a : jr z,@ReloadByteRG1 + rl c : rl b + add a : jr z,@ReloadByteRG2 + jr c,@ReadGamma : ret + +@ReloadByteRG1 ld a,(hl) : inc hl : rla + rl c : rl b + add a : jr c,@ReadGamma : ret + +@ReloadByteRG2 ld a,(hl) : inc hl : rla + jr c,@ReadGamma : ret + +; +; pretty usual getbit for mixed datastreams + +@ReloadByte: ld a,(hl) : inc hl : rla : ret + +MEND + diff --git a/tools/rasm/decrunch/unlzsa1_fast.asm b/tools/rasm/decrunch/unlzsa1_fast.asm new file mode 100644 index 0000000..0e2617f --- /dev/null +++ b/tools/rasm/decrunch/unlzsa1_fast.asm @@ -0,0 +1,204 @@ +; +; Speed-optimized LZSA1 decompressor by spke & uniabis (109 bytes) +; +; ver.00 by spke for LZSA 0.5.4 (03-24/04/2019, 134 bytes); +; ver.01 by spke for LZSA 0.5.6 (25/04/2019, 110(-24) bytes, +0.2% speed); +; ver.02 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression); +; ver.03 by uniabis (30/07/2019, 109(-1) bytes, +3.5% speed); +; ver.04 by spke (31/07/2019, small re-organization of macros); +; ver.05 by uniabis (22/08/2019, 107(-2) bytes, same speed); +; ver.06 by spke for LZSA 1.0.7 (27/08/2019, 111(+4) bytes, +2.1% speed); +; ver.07 by spke for LZSA 1.1.0 (25/09/2019, added full revision history); +; ver.08 by spke for LZSA 1.1.2 (22/10/2019, re-organized macros and added an option for unrolled copying of long matches); +; ver.09 by spke for LZSA 1.2.1 (02/01/2020, 109(-2) bytes, same speed) +; +; The data must be compressed using the command line compressor by Emmanuel Marty +; The compression is done as follows: +; +; lzsa.exe -f1 -r +; +; where option -r asks for the generation of raw (frame-less) data. +; +; The decompression is done in the standard way: +; +; ld hl,FirstByteOfCompressedData +; ld de,FirstByteOfMemoryForDecompressedData +; call DecompressLZSA1 +; +; Backward compression is also supported; you can compress files backward using: +; +; lzsa.exe -f1 -r -b +; +; and decompress the resulting files using: +; +; ld hl,LastByteOfCompressedData +; ld de,LastByteOfMemoryForDecompressedData +; call DecompressLZSA1 +; +; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor). +; +; Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty, +; see https://github.com/emmanuel-marty/lzsa for more information +; +; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. + +; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+57 bytes) +; DEFINE BACKWARD_DECOMPRESS + + IFNDEF BACKWARD_DECOMPRESS + + MACRO NEXT_HL + inc hl + ENDM + + MACRO ADD_OFFSET + ex de,hl : add hl,de + ENDM + + MACRO COPY1 + ldi + ENDM + + MACRO COPYBC + ldir + ENDM + + ELSE + + MACRO NEXT_HL + dec hl + ENDM + + MACRO ADD_OFFSET + ex de,hl : ld a,e : sub l : ld l,a + ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes + ENDM + + MACRO COPY1 + ldd + ENDM + + MACRO COPYBC + lddr + ENDM + + ENDIF + +macro DecompressLZSA1 + ld b,0 : jr @ReadToken + +@NoLiterals: xor (hl) : NEXT_HL : jp m,@LongOffset + +@ShortOffset: push de : ld e,(hl) : ld d,#FF + + ; short matches have length 0+3..14+3 + add 3 : cp 15+3 : jr nc,@LongerMatch + + ; placed here this saves a JP per iteration +@CopyMatch: ld c,a +.UseC NEXT_HL : ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src] + ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] + COPY1 : COPY1 : COPYBC ; BC = 0, DE = dest +.popSrc pop hl ; HL = src + +@ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream, + ; where LLL is the number of literals and MMMM is + ; a length of the match that follows after the literals + ld a,(hl) : and #70 : jr z,@NoLiterals + + cp #70 : jr z,@MoreLiterals ; LLL=7 means 7+ literals... + rrca : rrca : rrca : rrca : ld c,a ; LLL<7 means 0..6 literals... + + ld a,(hl) : NEXT_HL + COPYBC + + ; the top bit of token is set if the offset contains two bytes + and #8F : jp p,@ShortOffset + +@LongOffset: ; read second byte of the offset + push de : ld e,(hl) : NEXT_HL : ld d,(hl) + add -128+3 : cp 15+3 : jp c,@CopyMatch + + IFNDEF UNROLL_LONG_MATCHES + + ; MMMM=15 indicates a multi-byte number of literals +@LongerMatch: NEXT_HL : add (hl) : jr nc,@CopyMatch + + ; the codes are designed to overflow; + ; the overflow value 1 means read 1 extra byte + ; and overflow value 0 means read 2 extra bytes +.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,@CopyMatch.UseC +.code0 NEXT_HL : ld b,(hl) + + ; the two-byte match length equal to zero + ; designates the end-of-data marker + ld a,b : or c : jr nz,@CopyMatch.UseC + pop de : ret + + ELSE + + ; MMMM=15 indicates a multi-byte number of literals +@LongerMatch: NEXT_HL : add (hl) : jr c,@VeryLongMatch + + ld c,a +.UseC NEXT_HL : ex (sp),hl + ADD_OFFSET + COPY1 : COPY1 + + ; this is an unrolled equivalent of LDIR + xor a : sub c + and 32-1 : add a + ld (.jrOffset),a : jr nz,$+2 +.jrOffset EQU $-1 +.fastLDIR repeat 32 + COPY1 + rend + jp pe,.fastLDIR + jp @CopyMatch.popSrc + +@VeryLongMatch: ; the codes are designed to overflow; + ; the overflow value 1 means read 1 extra byte + ; and overflow value 0 means read 2 extra bytes +.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,@LongerMatch.UseC +.code0 NEXT_HL : ld b,(hl) + + ; the two-byte match length equal to zero + ; designates the end-of-data marker + ld a,b : or c : jr nz,@LongerMatch.UseC + pop de : ret + + ENDIF + +@MoreLiterals: ; there are three possible situations here + xor (hl) : NEXT_HL : exa + ld a,7 : add (hl) : jr c,@ManyLiterals + +@CopyLiterals: ld c,a +.UseC NEXT_HL : COPYBC + + exa : jp p,@ShortOffset : jr @LongOffset + +@ManyLiterals: +.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,@CopyLiterals.UseC +.code0 NEXT_HL : ld b,(hl) : jr @CopyLiterals.UseC + +mend + + + diff --git a/tools/rasm/decrunch/unlzsa2_fast.asm b/tools/rasm/decrunch/unlzsa2_fast.asm new file mode 100755 index 0000000..8c6b5b1 --- /dev/null +++ b/tools/rasm/decrunch/unlzsa2_fast.asm @@ -0,0 +1,189 @@ +; +; Speed-optimized LZSA2 decompressor by spke & uniabis (216 bytes) +; + + DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+38 bytes) +; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b + + IFNDEF BACKWARD_DECOMPRESS + + MACRO NEXT_HL + inc hl + ENDM + + MACRO ADD_OFFSET + ex de,hl : add hl,de + ENDM + + MACRO COPY1 + ldi + ENDM + + MACRO COPYBC + ldir + ENDM + + ELSE + + MACRO NEXT_HL + dec hl + ENDM + + MACRO ADD_OFFSET + ex de,hl : ld a,e : sub l : ld l,a + ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes + ENDM + + MACRO COPY1 + ldd + ENDM + + MACRO COPYBC + lddr + ENDM + + ENDIF + + +macro DecompressLZSA2 +@lzsa2 + ; A' stores next nibble as %1111.... or assumed to contain trash + ; B is assumed to be 0 + ld b,0 : scf : exa : jr .ReadToken + +.ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,.CopyLiterals + ld c,(hl) : NEXT_HL + ld a,b : ld b,(hl) + jr .NEXTHLuseBC + + +.MoreLiterals: ld b,(hl) : NEXT_HL + scf : exa : jr nc,.noUpdatemoar + + ld a,(hl) : or #F0 : exa + ld a,(hl) : NEXT_HL : or #0F + rrca : rrca : rrca : rrca + +.noUpdatemoar ;sub #F0-3 : cp 15+3 : jr z,ManyLiterals + inc a : jr z,.ManyLiterals : sub #F0-3+1 + +.CopyLiterals: ld c,a : ld a,b : ld b,0 + COPYBC + push de : or a : jp p,.CASE0xx ;: jr CASE1xx + + cp %11000000 : jr c,.CASE10x + +.CASE11x cp %11100000 : jr c,.CASE110 + + ; "111": repeated offset +.CASE111: ld de,ix : jr .MatchLen + + +.Literals0011: jr nz,.MoreLiterals + + ; if "LL" of the byte token is equal to 0, + ; there are no literals to copy +.NoLiterals: or (hl) : NEXT_HL + push de : jp m,.CASE1xx + + ; short (5 or 9 bit long) offsets +.CASE0xx ld d,#FF : cp %01000000 : jr c,.CASE00x + + ; "01x": the case of the 9-bit offset +.CASE01x: cp %01100000 : rl d + +.ReadOffsetE ld e,(hl) : NEXT_HL + +.SaveOffset: LD ix,de + +.MatchLen: inc a : and %00000111 : jr z,.LongerMatch : inc a + +.CopyMatch: ld c,a +;.useC + ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src] + ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] + COPY1 + COPYBC +.popSrc pop hl + + ; compressed data stream contains records + ; each record begins with the byte token "XYZ|LL|MMM" +.ReadToken: ld a,(hl) : and %00011000 : jp pe,.Literals0011 ; process the cases 00 and 11 separately + + rrca : rrca : rrca + + ld c,a : ld a,(hl) ; token is re-read for further processing +.NEXTHLuseBC NEXT_HL + COPYBC + + ; the token and literals are followed by the offset + push de : or a : jp p,.CASE0xx + +.CASE1xx cp %11000000 : jr nc,.CASE11x + + ; "10x": the case of the 13-bit offset +.CASE10x: ld c,a : exa : jr nc,.noUpdatecase10x + + ld a,(hl) : or #F0 : exa + ld a,(hl) : NEXT_HL : or #0F + rrca : rrca : rrca : rrca + +.noUpdatecase10x ld d,a : ld a,c + cp %10100000 : dec d : rl d : jr .ReadOffsetE + + + + ; "110": 16-bit offset +.CASE110: ld d,(hl) : NEXT_HL : jr .ReadOffsetE + + + + + ; "00x": the case of the 5-bit offset +.CASE00x: ld c,a : exa : jr nc,.noUpdatecase00x + + ld a,(hl) : or #F0 : exa + ld a,(hl) : NEXT_HL : or #0F + rrca : rrca : rrca : rrca + +.noUpdatecase00x ld e,a : ld a,c + cp %00100000 : rl e : jp .SaveOffset + + +.LongerMatch: scf : exa : jr nc,.noUpdatelongermatch + + ld a,(hl) : or #F0 : exa + ld a,(hl) : NEXT_HL : or #0F + rrca : rrca : rrca : rrca + +.noUpdatelongermatch sub #F0-9 : cp 15+9 : jr c,.CopyMatch + + +.LongMatch: add (hl) : NEXT_HL : jr c,.VeryLongMatch + + ld c,a +.useC ex (sp),hl + ADD_OFFSET + COPY1 + + ; this is an unrolled equivalent of LDIR + xor a : sub c + and 32-1 : add a + ld (.jrOffset),a : jr nz,$+2 +.jrOffset EQU $-1 +.fastLDIR repeat 32 + COPY1 + rend + jp pe,.fastLDIR + jp .popSrc + +.VeryLongMatch: ld c,(hl) : NEXT_HL + ld b,(hl) : NEXT_HL : jr nz,.useC + pop de : ret + +mend + + + + + -- cgit v1.2.3