From 30bf0f51335e87812ffeb54e9437f0b6a1514d67 Mon Sep 17 00:00:00 2001
From: "Juan J. Martinez" <jjm@usebox.net>
Date: Tue, 6 Sep 2022 07:37:20 +0100
Subject: Updated rasm to 1.7

---
 tools/rasm/decrunch/aplib_z80_todo.asm     | 190 ---------------------
 tools/rasm/decrunch/dzx0_fast.asm          | 237 +++++++++++++++++++++++++
 tools/rasm/decrunch/dzx0_standard.asm      |  64 +++++++
 tools/rasm/decrunch/dzx0_standard_back.asm |  65 +++++++
 tools/rasm/decrunch/dzx0_turbo_back.asm    | 101 +++++++++++
 tools/rasm/decrunch/dzx7_turbo.asm         |   4 +-
 tools/rasm/decrunch/exomizer3megachur.asm  | 210 -----------------------
 tools/rasm/decrunch/lz48decrunch_v006.asm  | 113 ------------
 tools/rasm/decrunch/lz48decrunch_v006b.asm |  78 +++++++++
 tools/rasm/decrunch/unaplib.asm            | 190 +++++++++++++++++++++
 tools/rasm/decrunch/unaplib_fast.asm       | 266 +++++++++++++++++++++++++++++
 tools/rasm/decrunch/unlzsa1_fast.asm       | 204 ++++++++++++++++++++++
 tools/rasm/decrunch/unlzsa2_fast.asm       | 189 ++++++++++++++++++++
 13 files changed, 1396 insertions(+), 515 deletions(-)
 delete mode 100644 tools/rasm/decrunch/aplib_z80_todo.asm
 create mode 100644 tools/rasm/decrunch/dzx0_fast.asm
 create mode 100644 tools/rasm/decrunch/dzx0_standard.asm
 create mode 100644 tools/rasm/decrunch/dzx0_standard_back.asm
 create mode 100644 tools/rasm/decrunch/dzx0_turbo_back.asm
 delete mode 100644 tools/rasm/decrunch/exomizer3megachur.asm
 delete mode 100644 tools/rasm/decrunch/lz48decrunch_v006.asm
 create mode 100644 tools/rasm/decrunch/lz48decrunch_v006b.asm
 create mode 100644 tools/rasm/decrunch/unaplib.asm
 create mode 100644 tools/rasm/decrunch/unaplib_fast.asm
 create mode 100644 tools/rasm/decrunch/unlzsa1_fast.asm
 create mode 100755 tools/rasm/decrunch/unlzsa2_fast.asm

(limited to 'tools/rasm/decrunch')

diff --git a/tools/rasm/decrunch/aplib_z80_todo.asm b/tools/rasm/decrunch/aplib_z80_todo.asm
deleted file mode 100644
index 6843a14..0000000
--- a/tools/rasm/decrunch/aplib_z80_todo.asm
+++ /dev/null
@@ -1,190 +0,0 @@
-;Z80 Version by Dan Weiss
-;Call depack.
-;hl = source
-;de = dest
-
-ap_bits: .db 0
-ap_byte: .db 0
-lwm:	.db 0
-r0:	.dw 0
-
-ap_getbit:
-	push bc
-		ld bc,(ap_bits)
-		rrc c
-		jr nc,ap_getbit_continue
-		ld b,(hl)
-		inc hl
-ap_getbit_continue:
-		ld a,c
-		and b
-		ld (ap_bits),bc
-	pop bc
-	ret
-
-ap_getbitbc: ;doubles BC and adds the read bit
-	sla c
-	rl b
-	call ap_getbit
-	ret z
-	inc bc
-	ret
-
-ap_getgamma:
-	ld bc,1
-ap_getgammaloop:
-	call ap_getbitbc
-	call ap_getbit
-	jr nz,ap_getgammaloop
-	ret
-
-
-depack:
-	;hl = source
-	;de = dest
-	ldi
-	xor a
-	ld (lwm),a
-	inc a
-	ld (ap_bits),a
-	
-aploop:
-	call ap_getbit
-	jp z, apbranch1
-	call ap_getbit
-	jr z, apbranch2
-	call ap_getbit
-	jr z, apbranch3
-	;LWM = 0
-	xor a
-	ld (lwm),a
-	;get an offset
-	ld bc,0
-	call ap_getbitbc
-	call ap_getbitbc
-	call ap_getbitbc
-	call ap_getbitbc
-	ld a,b
-	or c
-	jr nz,apbranch4
-	xor a  ;write a 0
-	ld (de),a
-	inc de
-	jr aploop
-apbranch4:
-	ex de,hl ;write a previous bit (1-15 away from dest)
-	push hl
-		sbc hl,bc
-		ld a,(hl)
-	pop hl
-	ld (hl),a
-	inc hl
-	ex de,hl
-	jr aploop
-apbranch3:
-	;use 7 bit offset, length = 2 or 3
-	;if a zero is encountered here, it's EOF
-	ld c,(hl)
-	inc hl
-	rr c
-	ret z
-	ld b,2
-	jr nc,ap_dont_inc_b
-	inc b
-ap_dont_inc_b:
-	;LWM = 1
-	ld a,1
-	ld (lwm),a
-	
-	push hl
-		ld a,b
-		ld b,0
-		;R0 = c
-		ld (r0),bc
-		ld h,d
-		ld l,e
-		or a
-		sbc hl,bc
-		ld c,a
-		ldir
-	pop hl
-	jr aploop
-apbranch2:
-	;use a gamma code * 256 for offset, another gamma code for length
-	call ap_getgamma
-	dec bc
-	dec bc
-	ld a,(lwm)
-	or a
-	jr nz,ap_not_lwm
-	;bc = 2?
-	ld a,b
-	or c
-	jr nz,ap_not_zero_gamma
-	;if gamma code is 2, use old r0 offset, and a new gamma code for length
-	call ap_getgamma
-	push hl
-		ld h,d
-		ld l,e
-		push bc
-			ld bc,(r0)
-			sbc hl,bc
-		pop bc
-		ldir
-	pop hl
-	jr ap_finishup
-	
-ap_not_zero_gamma:
-	dec bc
-ap_not_lwm:
-	;do I even need this code?
-	;bc=bc*256+(hl), lazy 16bit way
-	ld b,c
-	ld c,(hl)
-	inc hl
-	ld (r0),bc
-	push bc
-		call ap_getgamma
-		ex (sp),hl
-		;bc = len, hl=offs
-		push de
-			ex de,hl
-			;some comparison junk for some reason
-			ld hl,31999
-			or a
-			sbc hl,de
-			jr nc,skip1
-			inc bc
-skip1:
-			ld hl,1279
-			or a
-			sbc hl,de
-			jr nc,skip2
-			inc bc
-skip2:
-			ld hl,127
-			or a
-			sbc hl,de
-			jr c,skip3
-			inc bc
-			inc bc
-skip3:
-			;bc = len, de = offs, hl=junk
-		pop hl
-		push hl
-			or a
-			sbc hl,de
-		pop de
-		;hl=dest-offs, bc=len, de = dest
-		ldir
-	pop hl
-ap_finishup:
-	ld a,1
-	ld (lwm),a
-	jp aploop
-
-apbranch1:
-	ldi
-	xor a
-	ld (lwm),a
-	jp aploop
diff --git a/tools/rasm/decrunch/dzx0_fast.asm b/tools/rasm/decrunch/dzx0_fast.asm
new file mode 100644
index 0000000..55f4388
--- /dev/null
+++ b/tools/rasm/decrunch/dzx0_fast.asm
@@ -0,0 +1,237 @@
+;
+;  Speed-optimized ZX0 decompressor by spke (187 bytes)
+;
+;  ver.00 by spke (27/01-23/03/2021, 191 bytes)
+;  ver.01 by spke (24/03/2021, 193(+2) bytes - fixed a bug in the initialization)
+;  ver.01patch2 by uniabis (25/03/2021, 191(-2) bytes - fixed a bug with elias over 8bits)
+;  ver.01patch9 by uniabis (10/09/2021, 187(-4) bytes - support for new v2 format)
+;
+;  Original ZX0 decompressors were written by Einar Saukas
+;
+;  This decompressor was written on the basis of "Standard" decompressor by
+;  Einar Saukas and optimized for speed by spke. This decompressor is
+;  about 5% faster than the "Turbo" decompressor, which is 128 bytes long.
+;  It has about the same speed as the 412 bytes version of the "Mega" decompressor.
+;
+;  The decompressor uses AF, BC, DE, HL and IX and relies upon self-modified code.
+;
+;  The decompression is done in the standard way:
+;
+;  ld hl,FirstByteOfCompressedData
+;  ld de,FirstByteOfMemoryForDecompressedData
+;  call DecompressZX0
+;
+;  Of course, ZX0 compression algorithms are (c) 2021 Einar Saukas,
+;  see https://github.com/einar-saukas/ZX0 for more information
+;
+;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;
+;  This software is provided 'as-is', without any express or implied
+;  warranty.  In no event will the authors be held liable for any damages
+;  arising from the use of this software.
+;
+;  Permission is granted to anyone to use this software for any purpose,
+;  including commercial applications, and to alter it and redistribute it
+;  freely, subject to the following restrictions:
+;
+;  1. The origin of this software must not be misrepresented; you must not
+;     claim that you wrote the original software. If you use this software
+;     in a product, an acknowledgment in the product documentation would be
+;     appreciated but is not required.
+;  2. Altered source versions must be plainly marked as such, and must not be
+;     misrepresented as being the original software.
+;  3. This notice may not be removed or altered from any source distribution.
+
+macro DecompressZX0
+
+        ld      ix, @CopyMatch1
+        ld      bc, $ffff
+        ld      (@PrevOffset+1), bc      ; default offset is -1
+        inc     bc
+        ld      a, $80
+        jr      @RunOfLiterals           ; BC is assumed to contains 0 most of the time
+
+@ShorterOffsets:
+        ld      b, $ff                  ; the top byte of the offset is always $FF
+        ld      c, (hl)
+        inc     hl
+        rr      c
+        ld      (@PrevOffset+1), bc
+        jr      nc, @LongerMatch
+
+@CopyMatch2:                             ; the case of matches with len=2
+        ld      bc, 2
+
+        ; the faster match copying code
+@CopyMatch1:
+        push    hl                      ; preserve source
+
+@PrevOffset:
+        ld      hl, $ffff               ; restore offset (default offset is -1)
+        add     hl, de                  ; HL = dest - offset
+        ldir
+        pop     hl                      ; restore source
+
+        ; after a match you can have either
+        ; 0 + <elias length> = run of literals, or
+        ; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+@AfterMatch1:
+        add     a, a
+        jr      nc, @RunOfLiterals
+
+@UsualMatch:                             ; this is the case of usual match+offset
+        add     a, a
+        jr      nc, @LongerOffets
+        jr      nz, @ShorterOffsets      ; NZ after NC == "confirmed C"
+        
+        ld      a, (hl)                 ; reload bits
+        inc     hl
+        rla
+
+        jr      c, @ShorterOffsets
+
+@LongerOffets:
+        ld      c, $fe
+
+        add     a, a                    ; inline read gamma
+        rl      c
+        add     a, a
+        jr      nc, $-4
+
+        call    z, @ReloadReadGamma
+
+@ProcessOffset:
+
+        inc     c
+        ret     z                       ; end-of-data marker (only checked for longer offsets)
+        rr      c
+        ld      b, c
+        ld      c, (hl)
+        inc     hl
+        rr      c
+        ld      (@PrevOffset+1), bc
+
+        ; lowest bit is the first bit of the gamma code for length
+        jr      c, @CopyMatch2
+
+@LongerMatch:
+        ld      bc, 1
+
+        add     a, a                    ; inline read gamma
+        rl      c
+        add     a, a
+        jr      nc, $-4
+
+        call    z,@ReloadReadGamma
+
+@CopyMatch3:
+        push    hl                      ; preserve source
+        ld      hl, (@PrevOffset+1)      ; restore offset
+        add     hl, de                  ; HL = dest - offset
+
+        ; because BC>=3-1, we can do 2 x LDI safely
+        ldi
+        ldir
+        inc     c
+        ldi
+        pop     hl                      ; restore source
+
+        ; after a match you can have either
+        ; 0 + <elias length> = run of literals, or
+        ; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+@AfterMatch3:
+        add     a, a
+        jr      c, @UsualMatch
+
+@RunOfLiterals:
+        inc     c
+        add     a, a
+        jr      nc, @LongerRun
+        jr      nz, @CopyLiteral         ; NZ after NC == "confirmed C"
+        
+        ld      a, (hl)                 ; reload bits
+        inc     hl
+        rla
+
+        jr      c, @CopyLiteral
+
+@LongerRun:
+        add     a, a                    ; inline read gamma
+        rl      c
+        add     a, a
+        jr      nc, $-4
+
+        jr      nz, @CopyLiterals
+        
+        ld      a, (hl)                 ; reload bits
+        inc     hl
+        rla
+
+        call    nc, @ReadGammaAligned
+
+@CopyLiterals:
+        ldi
+
+@CopyLiteral:
+        ldir
+
+        ; after a literal run you can have either
+        ; 0 + <elias length> = match using a repeated offset, or
+        ; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+        add     a, a
+        jr      c, @UsualMatch
+
+@RepMatch:
+        inc     c
+        add     a, a
+        jr      nc, @LongerRepMatch
+        jr      nz, @CopyMatch1          ; NZ after NC == "confirmed C"
+
+        ld      a, (hl)                 ; reload bits
+        inc     hl
+        rla
+
+        jr      c, @CopyMatch1
+
+@LongerRepMatch:
+        add     a, a                    ; inline read gamma
+        rl      c
+        add     a, a
+        jr      nc, $-4
+
+        jp      nz, @CopyMatch1
+
+        ; this is a crafty equivalent of CALL ReloadReadGamma : JP CopyMatch1
+        push    ix
+
+        ;  the subroutine for reading the remainder of the partly read Elias gamma code.
+        ;  it has two entry points: ReloadReadGamma first refills the bit reservoir in A,
+        ;  while ReadGammaAligned assumes that the bit reservoir has just been refilled.
+@ReloadReadGamma:
+        ld      a, (hl)                 ; reload bits
+        inc     hl
+        rla
+
+        ret     c
+@ReadGammaAligned:
+        add     a, a
+        rl      c
+        add     a, a
+        ret     c
+        add     a, a
+        rl      c
+        add     a, a
+@ReadingLongGamma:                       ; this loop does not need unrolling, as it does not get much use anyway
+        ret     c
+        add     a, a
+        rl      c
+        rl      b
+        add     a, a
+        jr      nz, @ReadingLongGamma
+
+        ld      a, (hl)                 ; reload bits
+        inc     hl
+        rla
+        jr      @ReadingLongGamma
+mend
+
diff --git a/tools/rasm/decrunch/dzx0_standard.asm b/tools/rasm/decrunch/dzx0_standard.asm
new file mode 100644
index 0000000..6525c8b
--- /dev/null
+++ b/tools/rasm/decrunch/dzx0_standard.asm
@@ -0,0 +1,64 @@
+; -----------------------------------------------------------------------------
+; ZX0 decoder by Einar Saukas & Urusergi
+; "Standard" version (68 bytes only)
+; -----------------------------------------------------------------------------
+; Parameters:
+;   HL: source address (compressed data)
+;   DE: destination address (decompressing)
+; -----------------------------------------------------------------------------
+
+macro dzx0_standard
+        ld      bc, $ffff               ; preserve default offset 1
+        push    bc
+        inc     bc
+        ld      a, $80
+@dzx0s_literals:
+        call    @dzx0s_elias             ; obtain length
+        ldir                            ; copy literals
+        add     a, a                    ; copy from last offset or new offset?
+        jr      c, @dzx0s_new_offset
+        call    @dzx0s_elias             ; obtain length
+@dzx0s_copy:
+        ex      (sp), hl                ; preserve source, restore offset
+        push    hl                      ; preserve offset
+        add     hl, de                  ; calculate destination - offset
+        ldir                            ; copy from offset
+        pop     hl                      ; restore offset
+        ex      (sp), hl                ; preserve offset, restore source
+        add     a, a                    ; copy from literals or new offset?
+        jr      nc, @dzx0s_literals
+@dzx0s_new_offset:
+        pop     bc                      ; discard last offset
+        ld      c, $fe                  ; prepare negative offset
+        call    @dzx0s_elias_loop        ; obtain offset MSB
+        inc     c
+        ret     z                       ; check end marker
+        ld      b, c
+        ld      c, (hl)                 ; obtain offset LSB
+        inc     hl
+        rr      b                       ; last offset bit becomes first length bit
+        rr      c
+        push    bc                      ; preserve new offset
+        ld      bc, 1                   ; obtain length
+        call    nc, @dzx0s_elias_backtrack
+        inc     bc
+        jr      @dzx0s_copy
+@dzx0s_elias:
+        inc     c                       ; interlaced Elias gamma coding
+@dzx0s_elias_loop:
+        add     a, a
+        jr      nz, @dzx0s_elias_skip
+        ld      a, (hl)                 ; load another group of 8 bits
+        inc     hl
+        rla
+@dzx0s_elias_skip:
+        ret     c
+@dzx0s_elias_backtrack:
+        add     a, a
+        rl      c
+        rl      b
+        jr      @dzx0s_elias_loop
+mend
+; -----------------------------------------------------------------------------
+
+
diff --git a/tools/rasm/decrunch/dzx0_standard_back.asm b/tools/rasm/decrunch/dzx0_standard_back.asm
new file mode 100644
index 0000000..3da94bd
--- /dev/null
+++ b/tools/rasm/decrunch/dzx0_standard_back.asm
@@ -0,0 +1,65 @@
+; -----------------------------------------------------------------------------
+; ZX0 decoder by Einar Saukas
+; "Standard" version (69 bytes only) - BACKWARDS VARIANT
+; -----------------------------------------------------------------------------
+; Parameters:
+;   HL: last source address (compressed data)
+;   DE: last destination address (decompressing)
+; -----------------------------------------------------------------------------
+
+Macro dzx0_standard_back
+        ld      bc, 1                   ; preserve default offset 1
+        push    bc
+        ld      a, $80
+@dzx0sb_literals:
+        call    @dzx0sb_elias            ; obtain length
+        lddr                            ; copy literals
+        inc     c
+        add     a, a                    ; copy from last offset or new offset?
+        jr      c, @dzx0sb_new_offset
+        call    @dzx0sb_elias            ; obtain length
+@dzx0sb_copy:
+        ex      (sp), hl                ; preserve source, restore offset
+        push    hl                      ; preserve offset
+        add     hl, de                  ; calculate destination - offset
+        lddr                            ; copy from offset
+        inc     c
+        pop     hl                      ; restore offset
+        ex      (sp), hl                ; preserve offset, restore source
+        add     a, a                    ; copy from literals or new offset?
+        jr      nc, @dzx0sb_literals
+@dzx0sb_new_offset:
+        inc     sp                      ; discard last offset
+        inc     sp
+        call    @dzx0sb_elias            ; obtain offset MSB
+        dec     b
+        ret     z                       ; check end marker
+        dec     c                       ; adjust for positive offset
+        ld      b, c
+        ld      c, (hl)                 ; obtain offset LSB
+        dec     hl
+        srl     b                       ; last offset bit becomes first length bit
+        rr      c
+        inc     bc
+        push    bc                      ; preserve new offset
+        ld      bc, 1                   ; obtain length
+        call    c, @dzx0sb_elias_backtrack
+        inc     bc
+        jr      @dzx0sb_copy
+@dzx0sb_elias_backtrack:
+        add     a, a
+        rl      c
+        rl      b
+@dzx0sb_elias:
+        add     a, a                    ; inverted interlaced Elias gamma coding
+        jr      nz, @dzx0sb_elias_skip
+        ld      a, (hl)                 ; load another group of 8 bits
+        dec     hl
+        rla
+@dzx0sb_elias_skip:
+        jr      c, @dzx0sb_elias_backtrack
+        ret
+mend
+; -----------------------------------------------------------------------------
+
+
diff --git a/tools/rasm/decrunch/dzx0_turbo_back.asm b/tools/rasm/decrunch/dzx0_turbo_back.asm
new file mode 100644
index 0000000..d009e92
--- /dev/null
+++ b/tools/rasm/decrunch/dzx0_turbo_back.asm
@@ -0,0 +1,101 @@
+; -----------------------------------------------------------------------------
+; ZX0 decoder by Einar Saukas & introspec
+; "Turbo" version (126 bytes, 21% faster) - BACKWARDS VARIANT
+; -----------------------------------------------------------------------------
+; Parameters:
+;   HL: last source address (compressed data)
+;   DE: last destination address (decompressing)
+; -----------------------------------------------------------------------------
+
+macro dzx0_turbo_back
+        ld      bc, 1                   ; preserve default offset 1
+        ld      (@dzx0tb_last_offset+1), bc
+        ld      a, $80
+        jr      @dzx0tb_literals
+@dzx0tb_new_offset:
+        add     a, a                    ; obtain offset MSB
+        call    c, @dzx0tb_elias
+        dec     b
+        ret     z                       ; check end marker
+        dec     c                       ; adjust for positive offset
+        ld      b, c
+        ld      c, (hl)                 ; obtain offset LSB
+        dec     hl
+        srl     b                       ; last offset bit becomes first length bit
+        rr      c
+        inc     bc
+        ld      (@dzx0tb_last_offset+1), bc ; preserve new offset
+        ld      bc, 1                   ; obtain length
+        call    c, @dzx0tb_elias_loop
+        inc     bc
+@dzx0tb_copy:
+        push    hl                      ; preserve source
+@dzx0tb_last_offset:
+        ld      hl, 0                   ; restore offset
+        add     hl, de                  ; calculate destination - offset
+        lddr                            ; copy from offset
+        inc     c
+        pop     hl                      ; restore source
+        add     a, a                    ; copy from literals or new offset?
+        jr      c, @dzx0tb_new_offset
+@dzx0tb_literals:
+        add     a, a                    ; obtain length
+        call    c, @dzx0tb_elias
+        lddr                            ; copy literals
+        inc     c
+        add     a, a                    ; copy from last offset or new offset?
+        jr      c, @dzx0tb_new_offset
+        add     a, a                    ; obtain length
+        call    c, @dzx0tb_elias
+        jp      @dzx0tb_copy
+@dzx0tb_elias_loop:
+        add     a, a
+        rl      c
+        add     a, a
+        ret     nc
+@dzx0tb_elias:
+        jp      nz, @dzx0tb_elias_loop   ; inverted interlaced Elias gamma coding
+        ld      a, (hl)                 ; load another group of 8 bits
+        dec     hl
+        rla
+        ret     nc
+        add     a, a
+        rl      c
+        add     a, a
+        ret     nc
+        add     a, a
+        rl      c
+        add     a, a
+        ret     nc
+        add     a, a
+        rl      c
+        add     a, a
+        ret     nc
+@dzx0tb_elias_reload:
+        add     a, a
+        rl      c
+        rl      b
+        add     a, a
+        ld      a, (hl)                 ; load another group of 8 bits
+        dec     hl
+        rla
+        ret     nc
+        add     a, a
+        rl      c
+        rl      b
+        add     a, a
+        ret     nc
+        add     a, a
+        rl      c
+        rl      b
+        add     a, a
+        ret     nc
+        add     a, a
+        rl      c
+        rl      b
+        add     a, a
+        jr      c, @dzx0tb_elias_reload
+        ret
+; -----------------------------------------------------------------------------
+mend
+
diff --git a/tools/rasm/decrunch/dzx7_turbo.asm b/tools/rasm/decrunch/dzx7_turbo.asm
index 779ced5..cb66be5 100644
--- a/tools/rasm/decrunch/dzx7_turbo.asm
+++ b/tools/rasm/decrunch/dzx7_turbo.asm
@@ -8,7 +8,7 @@
 ; -----------------------------------------------------------------------------
 
 dzx7_turbo:
-        ld      a, $80
+        ld      a, #80
 dzx7t_copy_byte_loop:
         ldi                             ; copy literal byte
 dzx7t_main_loop:
@@ -42,7 +42,7 @@ dzx7t_len_value_start:
 ; determine offset
         ld      e, (hl)                 ; load offset flag (1 bit) + offset value (7 bits)
         inc     hl
-        defb    $cb, $33                ; opcode for undocumented instruction "SLL E" aka "SLS E"
+        defb    #cb, #33                ; opcode for undocumented instruction "SLL E" aka "SLS E"
         jr      nc, dzx7t_offset_end    ; if offset flag is set, load 4 extra bits
         add     a, a                    ; check next bit
         call    z, dzx7t_load_bits      ; no more bits left?
diff --git a/tools/rasm/decrunch/exomizer3megachur.asm b/tools/rasm/decrunch/exomizer3megachur.asm
deleted file mode 100644
index ea1973e..0000000
--- a/tools/rasm/decrunch/exomizer3megachur.asm
+++ /dev/null
@@ -1,210 +0,0 @@
-;Exomizer 2 Z80 decoder
-;Copyright (C) 2008-2016 by Jaime Tejedor Gomez (Metalbrain)
-;
-;Optimized by Antonio Villena and Urusergi (169 bytes)
-;
-;Compression algorithm by Magnus Lind
-;
-;  This depacker is free software; you can redistribute it and/or
-;  modify it under the terms of the GNU Lesser General Public
-;  License as published by the Free Software Foundation; either
-;  version 2.1 of the License, or (at your option) any later version.
-;
-;  This library is distributed in the hope that it will be useful,
-;  but WITHOUT ANY WARRANTY; without even the implied warranty of
-;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;  Lesser General Public License for more details.
-;
-;  You should have received a copy of the GNU Lesser General Public
-;  License along with this library; if not, write to the Free Software
-;  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;
-;
-;input- hl=compressed data start
-;  de=uncompressed destination start
-;
-;  you may change exo_mapbasebits to point to any free buffer
-;
-;ATTENTION!
-;A huge speed boost (around 14%) can be gained at the cost of only 5 bytes.
-;If you want this, replace all instances of "call exo_getbit" with "srl a" followed by
-;"call z,exo_getbit", and remove the first two instructions in exo_getbit routine.
-; ---------------------------
-; modified by Megachur in 2018
-; ---------------------------
-; hl -> compressed data start
-; de -> uncompressed destination start
-; ---------------------------
-
-;EXO_BACKWARD equ 1
-ENABLE_MEXO_GETBIT	equ 1
-
-list:EXOMIZER_ADDRESS:nolist
-; ---------------------------
-MACRO MEXO_GETBIT
-	srl a
-	jr nz,@1
-	ld a,(hl)
-	IFDEF EXO_BACKWARD
-	dec hl
-	ELSE
-	inc hl
-	ENDIF
-	rra
-@1
-ENDM
-
-deexo:
-	ld iy,exo_mapbasebits+11
-	ld a,(hl)
-
-	IFDEF EXO_BACKWARD
-	dec hl
-	ELSE 
-	inc hl
-	ENDIF
-
-	ld b,52
-	push de
-	cp a
-
-exo_initbits:
-	ld c,16
-	jr nz,exo_get4bits
-	ld ixl,c
-	ld de,1  ;DE=b2
-
-exo_get4bits:
-	IFDEF ENABLE_MEXO_GETBIT
-	MEXO_GETBIT
-	ELSE
-	srl a:call z,exo_getbit	;call exo_getbit ;get one bit
-	ENDIF
-	rl c
-	jr nc,exo_get4bits
-	inc c
-	push hl
-	ld hl,1
-	ld (iy+41),c ;bits[i]=b1 (and opcode 41 == add hl,hl)
-
-exo_setbit:
-	dec c
-	jr nz,exo_setbit-1 ;jump to add hl,hl instruction
-	ld (iy-11),e
-	ld (iy+93),d ;base[i]=b2
-	add hl,de
-	ex de,hl
-	inc iy
-	pop hl
-	dec ixl
-	djnz exo_initbits
-	pop de
-	jr exo_mainloop
-
-exo_literalrun:
-	ld e,c  ;DE=1
-
-exo_getbits:
-	dec b
-	ret z
-
-exo_getbits1:
-	IFDEF ENABLE_MEXO_GETBIT
-	MEXO_GETBIT
-	ELSE
-	srl a:call z,exo_getbit	;call exo_getbit
-	ENDIF
-	rl e
-	rl d
-	jr nc,exo_getbits
-	ld b,d
-	ld c,e
-	pop de
-
-exo_literalcopy:
-	IFDEF EXO_BACKWARD
-	lddr
-	ELSE
-	ldir
-	ENDIF
-exo_mainloop:
-	inc c
-	IFDEF ENABLE_MEXO_GETBIT
-	MEXO_GETBIT
-	ELSE
-	srl a:call z,exo_getbit	;call exo_getbit ;literal?
-	ENDIF
-	jr c,exo_literalcopy
-	ld c,239
-exo_getindex:
-	IFDEF ENABLE_MEXO_GETBIT
-	MEXO_GETBIT
-	ELSE
-	srl a:call z,exo_getbit	;call exo_getbit
-	ENDIF
-	inc c
-	jr nc,exo_getindex
-	ret z
-	push de
-	ld d,b
-	jp p,exo_literalrun
-	ld iy,exo_mapbasebits-229
-	call exo_getpair
-	push de
-	rlc d
-	jr nz,exo_dontgo
-	dec e
-	ld bc,512+32 ;2 bits,48 offset
-	jr z,exo_goforit
-	dec e ;2?
-exo_dontgo:
-	ld bc,1024+16 ;4 bits,32 offset
-	jr z,exo_goforit
-	ld de,0
-	ld c,d  ;16 offset
-exo_goforit:
-	call exo_getbits1
-	ld iy,exo_mapbasebits+27
-	add iy,de
-	call exo_getpair
-	pop bc
-	ex (sp),hl
-	IFDEF EXO_BACKWARD
-	ex de,hl
-	add hl,de
-	lddr
-	ELSE
-	push hl
-	sbc hl,de
-	pop de
-	ldir
-	ENDIF
-	pop hl
-	jr exo_mainloop ;Next!
-exo_getpair:
-	add iy,bc
-	ld e,d
-	ld b,(iy+41)
-	call exo_getbits
-	ex de,hl
-	ld c,(iy-11)
-	ld b,(iy+93)
-	add hl,bc ;Always clear C flag
-	ex de,hl
-	ret
-
-	IFDEF ENABLE_MEXO_GETBIT
-	ELSE
-exo_getbit: 
-;	srl a
-;	ret nz
-	ld a,(hl)
-	inc hl
-	rra
-	ret
-	ENDIF
-
-exo_mapbasebits:
-	ds 156,#00   ;tables for bits,baseL,baseH
-; ---------------------------
-list:EXOMIZER_ADDRESS_LENGTH equ $-EXOMIZER_ADDRESS:nolist
\ No newline at end of file
diff --git a/tools/rasm/decrunch/lz48decrunch_v006.asm b/tools/rasm/decrunch/lz48decrunch_v006.asm
deleted file mode 100644
index 750b571..0000000
--- a/tools/rasm/decrunch/lz48decrunch_v006.asm
+++ /dev/null
@@ -1,113 +0,0 @@
-;
-; LZ48 decrunch
-;
-; hl  compressed data adress
-; de  output adress of data
-;
-
-
-org #8000
-
-; CALL #8000,source,destination
-di
-
-; parameters
-ld h,(ix+3)
-ld l,(ix+2)
-ld d,(ix+1)
-ld e,(ix+0)
-
-call LZ48_decrunch
-
-ei
-ret
-
-
-
-
-
-LZ48_decrunch
-ldi
-ld b,0
-
-nextsequence
-ld a,(hl)
-inc hl
-ld lx,a
-and #F0
-jr z,lzunpack ; no litteral bytes
-rrca
-rrca
-rrca
-rrca
-
-ld c,a
-cp 15 ; more bytes for length?
-jr nz,copyliteral
-
-getadditionallength
-ld a,(hl)
-inc hl
-inc a
-jr nz,lengthnext
-inc b
-dec bc
-jr getadditionallength
-lengthnext
-dec a
-add a,c
-ld c,a
-ld a,b
-adc a,0
-ld b,a ; bc=length
-
-copyliteral
-ldir
-
-lzunpack
-ld a,lx
-and #F
-add 3
-ld c,a
-cp 18 ; more bytes for length?
-jr nz,readoffset
-
-getadditionallengthbis
-ld a,(hl)
-inc hl
-inc a
-jr nz,lengthnextbis
-inc b
-dec bc
-jr getadditionallengthbis
-lengthnextbis
-dec a
-add a,c
-ld c,a
-ld a,b
-adc a,0
-ld b,a ; bc=length
-
-readoffset
-; read encoded offset
-ld a,(hl)
-inc a
-ret z ; LZ48 end with zero offset
-inc hl
-push hl
-ld l,a
-ld a,e
-sub l
-ld l,a
-ld a,d
-sbc a,0
-ld h,a
-; source=dest-copyoffset
-
-copykey
-ldir
-
-pop hl
-jr nextsequence
-
-
diff --git a/tools/rasm/decrunch/lz48decrunch_v006b.asm b/tools/rasm/decrunch/lz48decrunch_v006b.asm
new file mode 100644
index 0000000..69162a4
--- /dev/null
+++ b/tools/rasm/decrunch/lz48decrunch_v006b.asm
@@ -0,0 +1,78 @@
+;
+; LZ48 decrunch
+;
+
+; In	; HL=compressed data address
+; 	; DE=output data address
+; Out	; HL    last address of compressed data read (you must inc once for LZ48 stream)
+;	; DE    last address of decrunched data write +1
+;	; BC    always 3
+;	; A     always zero
+;	; IXL   undetermined
+;	; flags (inc a -> 0)
+; Modif	; AF, BC, DE, HL, IXL
+LZ48_decrunch
+	ldi
+	ld b,0
+
+nextsequence
+	ld a,(hl)
+	inc hl
+	cp #10
+	jr c,lzunpack ; no literal bytes
+	ld ixl,a
+	and #f0
+	rrca
+	rrca
+	rrca
+	rrca
+
+	cp 15 ; more bytes for literal length?
+	jr nz,copyliteral
+getadditionallength
+	ld c,(hl) ; get additional literal length byte
+	inc hl
+	add a,c ; compute literal length total
+	jr nc,lengthNC
+	inc b
+lengthNC
+	inc c
+	jr z,getadditionallength ; if last literal length byte was 255, we have more bytes to process
+copyliteral
+	ld c,a
+	ldir
+	ld a,ixl
+	and #F
+lzunpack
+	add 3
+	cp 18 ; more bytes for match length?
+	jr nz,readoffset
+getadditionallengthbis
+	ld c,(hl) ; get additional match length byte
+	inc hl
+	add a,c ; compute match length size total
+	jr nc,lengthNCbis
+	inc b
+lengthNCbis
+	inc c
+	jr z,getadditionallengthbis ; if last match length byte was 255, we have more bytes to process
+
+readoffset
+	ld c,a
+; read encoded offset
+	ld a,(hl)
+	inc a
+	ret z ; LZ48 end with zero offset
+	inc hl
+	push hl
+; source=dest-copyoffset
+	; A != 0 here
+	neg
+	ld l,a
+	ld h,#ff
+	add hl,de
+copykey
+	ldir
+
+	pop hl
+	jr nextsequence
diff --git a/tools/rasm/decrunch/unaplib.asm b/tools/rasm/decrunch/unaplib.asm
new file mode 100644
index 0000000..13c8678
--- /dev/null
+++ b/tools/rasm/decrunch/unaplib.asm
@@ -0,0 +1,190 @@
+;Z80 Version by Dan Weiss
+;Call depack.
+;hl = source
+;de = dest
+
+ap_bits: db 0
+ap_byte: db 0
+lwm:	db 0
+r0:	dw 0
+
+ap_getbit:
+	push bc
+		ld bc,(ap_bits)
+		rrc c
+		jr nc,ap_getbit_continue
+		ld b,(hl)
+		inc hl
+ap_getbit_continue:
+		ld a,c
+		and b
+		ld (ap_bits),bc
+	pop bc
+	ret
+
+ap_getbitbc: ;doubles BC and adds the read bit
+	sla c
+	rl b
+	call ap_getbit
+	ret z
+	inc bc
+	ret
+
+ap_getgamma:
+	ld bc,1
+ap_getgammaloop:
+	call ap_getbitbc
+	call ap_getbit
+	jr nz,ap_getgammaloop
+	ret
+
+
+depack:
+	;hl = source
+	;de = dest
+	ldi
+	xor a
+	ld (lwm),a
+	inc a
+	ld (ap_bits),a
+	
+aploop:
+	call ap_getbit
+	jp z, apbranch1
+	call ap_getbit
+	jr z, apbranch2
+	call ap_getbit
+	jr z, apbranch3
+	;LWM = 0
+	xor a
+	ld (lwm),a
+	;get an offset
+	ld bc,0
+	call ap_getbitbc
+	call ap_getbitbc
+	call ap_getbitbc
+	call ap_getbitbc
+	ld a,b
+	or c
+	jr nz,apbranch4
+	xor a  ;write a 0
+	ld (de),a
+	inc de
+	jr aploop
+apbranch4:
+	ex de,hl ;write a previous bit (1-15 away from dest)
+	push hl
+		sbc hl,bc
+		ld a,(hl)
+	pop hl
+	ld (hl),a
+	inc hl
+	ex de,hl
+	jr aploop
+apbranch3:
+	;use 7 bit offset, length = 2 or 3
+	;if a zero is encountered here, it's EOF
+	ld c,(hl)
+	inc hl
+	rr c
+	ret z
+	ld b,2
+	jr nc,ap_dont_inc_b
+	inc b
+ap_dont_inc_b:
+	;LWM = 1
+	ld a,1
+	ld (lwm),a
+	
+	push hl
+		ld a,b
+		ld b,0
+		;R0 = c
+		ld (r0),bc
+		ld h,d
+		ld l,e
+		or a
+		sbc hl,bc
+		ld c,a
+		ldir
+	pop hl
+	jr aploop
+apbranch2:
+	;use a gamma code * 256 for offset, another gamma code for length
+	call ap_getgamma
+	dec bc
+	dec bc
+	ld a,(lwm)
+	or a
+	jr nz,ap_not_lwm
+	;bc = 2?
+	ld a,b
+	or c
+	jr nz,ap_not_zero_gamma
+	;if gamma code is 2, use old r0 offset, and a new gamma code for length
+	call ap_getgamma
+	push hl
+		ld h,d
+		ld l,e
+		push bc
+			ld bc,(r0)
+			sbc hl,bc
+		pop bc
+		ldir
+	pop hl
+	jr ap_finishup
+	
+ap_not_zero_gamma:
+	dec bc
+ap_not_lwm:
+	;do I even need this code?
+	;bc=bc*256+(hl), lazy 16bit way
+	ld b,c
+	ld c,(hl)
+	inc hl
+	ld (r0),bc
+	push bc
+		call ap_getgamma
+		ex (sp),hl
+		;bc = len, hl=offs
+		push de
+			ex de,hl
+			;some comparison junk for some reason
+			ld hl,31999
+			or a
+			sbc hl,de
+			jr nc,skip1
+			inc bc
+skip1:
+			ld hl,1279
+			or a
+			sbc hl,de
+			jr nc,skip2
+			inc bc
+skip2:
+			ld hl,127
+			or a
+			sbc hl,de
+			jr c,skip3
+			inc bc
+			inc bc
+skip3:
+			;bc = len, de = offs, hl=junk
+		pop hl
+		push hl
+			or a
+			sbc hl,de
+		pop de
+		;hl=dest-offs, bc=len, de = dest
+		ldir
+	pop hl
+ap_finishup:
+	ld a,1
+	ld (lwm),a
+	jp aploop
+
+apbranch1:
+	ldi
+	xor a
+	ld (lwm),a
+	jp aploop
diff --git a/tools/rasm/decrunch/unaplib_fast.asm b/tools/rasm/decrunch/unaplib_fast.asm
new file mode 100644
index 0000000..47ad16b
--- /dev/null
+++ b/tools/rasm/decrunch/unaplib_fast.asm
@@ -0,0 +1,266 @@
+;
+;  Speed-optimized ApLib decompressor by spke (ver.04 spring 2020, 236 bytes)
+;
+;  The original Z80 decompressors for ApLib were written by Dan Weiss (Dwedit),
+;  then tweaked by Francisco Javier Pena Pareja (utopian),
+;  and optimized by Jaime Tejedor Gomez (Metalbrain) and Antonio Villena.
+;
+;  This is a new "implicit state" decompressor heavily optimized for speed by spke.
+;  (It is 11 bytes shorter and 14% faster than the previously fastest
+;  247b decompressor by Metalbrain and Antonio Villena.)
+;
+;  ver.00 by spke (21/08/2018-01/09/2018, 244 bytes, an edit of the existing 247b decompressor);
+;  ver.01 by spke (12-13/11/2018, 234(-10) bytes, +3% speed using the state machine for LWM);
+;  ver.02 by spke (06/08/2019, +1% speed);
+;  ver.03 by spke (27/08/2019, 236(+2) bytes, +1% speed using partly expanded LDIR);
+;  ver.04 by spke (spring 2020, added full revision history and support for long offsets)
+;
+;  The data must be compressed using any compressor for ApLib capable of generating raw data.
+;  At present, two best available compressors are:
+;
+;  "APC" by Sven-Ake Dahl: https://github.com/svendahl/cap or
+;  "apultra" by Emmanuel Marty: https://github.com/emmanuel-marty/apultra
+;
+;  The compression can done as follows:
+;
+;  apc.exe e <sourcefile> <outfile>
+;  or
+;  apultra.exe <sourcefile> <outfile>
+;
+;  A decent compressor was written by r57shell (although it is worse than compressors above):
+;  http://gendev.spritesmind.net/forum/viewtopic.php?p=32548#p32548
+;  The use of the official ApLib compressor by Joergen Ibsen is not recommended.
+;
+;  The decompression is done in the standard way:
+;
+;  ld hl,FirstByteOfCompressedData
+;  ld de,FirstByteOfMemoryForDecompressedData
+;  call DecompressApLib
+;
+;  The decompressor modifies AF, AF', BC, DE, HL, IXH, IY.
+;  (However, note that the option "AllowSelfmodifyingCode" removes the dependency on IY.)
+;
+;  Of course, ApLib compression algorithms are (c) 1998-2014 Joergen Ibsen,
+;  see http://www.ibsensoftware.com/ for more information
+;
+;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;
+;  This software is provided 'as-is', without any express or implied
+;  warranty.  In no event will the authors be held liable for any damages
+;  arising from the use of this software.
+;
+;  Permission is granted to anyone to use this software for any purpose,
+;  including commercial applications, and to alter it and redistribute it
+;  freely, subject to the following restrictions:
+;
+;  1. The origin of this software must not be misrepresented; you must not
+;     claim that you wrote the original software. If you use this software
+;     in a product, an acknowledgment in the product documentation would be
+;     appreciated but is not required.
+;  2. Altered source versions must be plainly marked as such, and must not be
+;     misrepresented as being the original software.
+;  3. This notice may not be removed or altered from any source distribution.
+
+;	DEFINE SupportLongOffsets				; +4 bytes for long offset support. slows decompression down by 1%, but may be needed to decompress files >=32K
+
+MACRO ApUnpack
+
+		ld a,128 : jr @LWM0_CASE0
+
+;==================================================================================================================
+;==================================================================================================================
+;==================================================================================================================
+
+@LWM0:			;LWM = 0 (LWM stands for "Last Was Match"; a flag that we did not have a match)
+
+@LWM0_ReloadByteC0		ld a,(hl) : inc hl : rla
+			jr c,@LWM0_Check2ndBit
+
+;
+;  case "0"+BYTE: copy a single literal
+
+@LWM0_CASE0:			ldi						; first byte is always copied as literal
+
+;
+;  main decompressor loop
+
+@LWM0_MainLoop:		add a : jr z,@LWM0_ReloadByteC0 : jr nc,@LWM0_CASE0	; "0"+BYTE = copy literal
+@LWM0_Check2ndBit		add a : call z,@ReloadByte : jr nc,@LWM0_CASE10	; "10"+gamma(offset/256)+BYTE+gamma(length) = the main matching mechanism
+			add a : call z,@ReloadByte : jp c,@LWM1_CASE111	; "110"+[oooooool] = matched 2-3 bytes with a small offset
+
+;
+;  branch "110"+[oooooool]: copy two or three bytes (bit "l") with the offset -1..-127 (bits "ooooooo"), or stop
+
+@LWM0_CASE110:		; "use 7 bit offset, length = 2 or 3"
+			; "if a zero is found here, it's EOF"
+			ld c,(hl) : rr c : ret z			; process EOF
+			inc hl
+			ld b,0
+
+			ld iyl,c : ld iyh,b				; save offset for future LWMs
+
+			push hl						; save src
+			ld h,d : ld l,e					; HL = dest
+			jr c,@LWM0_LengthIs3
+
+@LWM0_LengthIs2		sbc hl,bc
+			ldi : ldi
+			jr @LWM0_PreMainLoop
+
+@LWM0_LengthIs3		or a : sbc hl,bc
+			ldi : ldi : ldi
+			jr @LWM0_PreMainLoop
+
+;
+;  branch "10"+gamma(offset/256)+BYTE+gamma(length): the main matching mechanism
+
+@LWM0_CASE10:		; "use a gamma code * 256 for offset, another gamma code for length"
+			call @GetGammaCoded
+
+			; the original decompressor contains
+			;
+			; if ((LWM == 0) && (offs == 2)) { ... }
+			; else {
+			;	if (LWM == 0) { offs -= 3; }
+			;	else { offs -= 2; }
+			; }
+			;
+			; so, the idea here is to use the fact that GetGammaCoded returns (offset/256)+2,
+			; and to split the first condition by noticing that C-1 can never be zero
+			dec c : dec c : jr z,@LWM1_KickInLWM
+
+@LWM0_AfterLWM		dec c : ld b,c : ld c,(hl) : inc hl	; BC = offset
+
+			ld iyl,c : ld iyh,b : push bc
+
+			call @GetGammaCoded			; BC = len*
+
+			ex (sp),hl
+
+			; interpretation of length value is offset-dependent:
+			; if (offs >= 32000) len++; if (offs >= 1280) len++; if (offs < 128) len+=2;
+			; in other words,
+			; (1 <= offs < 128) +=2
+			; (128 <= offs < 1280) +=0
+			; (1280 <= offs < 31999) +=1
+			; NB offsets over 32000 need one more check, but other Z80 decompressors seem to ignore it. is it not needed?
+
+			; interpretation of length value is offset-dependent
+			exa : ld a,h
+	IFDEF	SupportLongOffsets
+			; NB offsets over 32000 require an additional check, which is skipped in most
+			; Z80 decompressors (seemingly as a performance optimization)
+			cp 32000>>8 : jr nc,@LWM0_Add2
+	ENDIF
+			cp 5 : jr nc,@LWM0_Add1
+			or a : jr nz,@LWM0_Add0
+			bit 7,l : jr nz,@LWM0_Add0
+@LWM0_Add2			inc bc
+@LWM0_Add1			inc bc
+@LWM0_Add0			; for offs<128 : 4+4+7+7 + 4+7 + 8+7 + 6+6 = 60t
+			; for offs>=1280 : 4+4+7+12 + 6 = 33t
+			; for 128<=offs<1280 : 4+4+7+7 + 4+12 = 38t OR 4+4+7+7 + 4+7+8+12 = 53t
+;			dec bc
+
+@LWM0_CopyMatch:		; this assumes that BC = len, DE = offset, HL = dest
+			; and also that (SP) = src, while having NC
+			ld a,e : sub l : ld l,a
+			ld a,d : sbc h
+@LWM0_CopyMatchLDH		ld h,a : ldi : ldir : exa
+@LWM0_PreMainLoop		pop hl					; recover src
+
+;==================================================================================================================
+;==================================================================================================================
+;==================================================================================================================
+
+@LWM1:			; LWM = 1
+
+;
+;  main decompressor loop
+
+@LWM1_MainLoop:		add a : jr z,@LWM1_ReloadByteC0 : jr nc,@LWM0_CASE0		; "0"+BYTE = copy literal
+@LWM1_Check2ndBit		add a : call z,@ReloadByte : jr nc,@LWM1_CASE10		; "10"+gamma(offset/256)+BYTE+gamma(length) = the main matching mechanism
+			add a : call z,@ReloadByte : jr nc,@LWM0_CASE110		; "110"+[oooooool] = matched 2-3 bytes with a small offset
+
+;
+;  case "111"+"oooo": copy a byte with offset -1..-15, or write zero to dest
+
+@LWM1_CASE111:		ld bc,%11100000
+			add a : call z,@ReloadByte : rl c		; read short offset (4 bits)
+			add a : call z,@ReloadByte : rl c		; read short offset (4 bits)
+			add a : call z,@ReloadByte : rl c		; read short offset (4 bits)
+			add a : call z,@ReloadByte : rl c		; read short offset (4 bits)
+			ex de,hl : jr z,@LWM1_WriteZero		; zero offset means "write zero" (NB: B is zero here)
+
+			; "write a previous byte (1-15 away from dest)"
+			push hl					; BC = offset, DE = src, HL = dest
+			sbc hl,bc				; HL = dest-offset (SBC works because branching above ensured NC)
+			ld b,(hl)
+			pop hl
+
+@LWM1_WriteZero		ld (hl),b : ex de,hl
+			inc de : jp @LWM0_MainLoop				; 10+4*(4+10+8)+4+7 + 11+15+7+10 + 7+4+6+10 = 179t
+
+@LWM1_ReloadByteC0		ld a,(hl) : inc hl : rla
+			jp nc,@LWM0_CASE0
+			jr @LWM1_Check2ndBit
+
+;
+;  branch "10"+gamma(offset/256)+BYTE+gamma(length): the main matching mechanism
+
+@LWM1_CASE10:		; "use a gamma code * 256 for offset, another gamma code for length"
+			call @GetGammaCoded
+
+			; the original decompressor contains
+			;
+			; if ((LWM == 0) && (offs == 2)) { ... }
+			; else {
+			;	if (LWM == 0) { offs -= 3; }
+			;	else { offs -= 2; }
+			; }
+			;
+			; so, the idea here is to use the fact that GetGammaCoded returns (offset/256)+2,
+			; and to split the first condition by noticing that C-1 can never be zero
+			dec c : jp @LWM0_AfterLWM
+
+;
+;  the re-use of the previous offset (LWM magic)
+
+@LWM1_KickInLWM:		; "and a new gamma code for length"
+			call @GetGammaCoded			; BC = len
+			push hl
+			exa : ld a,e : sub iyl : ld l,a
+			ld a,d : sbc iyh
+			jp @LWM0_CopyMatchLDH
+
+;==================================================================================================================
+;==================================================================================================================
+;==================================================================================================================
+
+;
+;  interlaced gamma code reader
+;  x0 -> 1x
+;  x1y0 -> 1xy
+;  x1y1z0 -> 1xyz etc
+;  (technically, this is a 2-based variation of Exp-Golomb-1)
+
+@GetGammaCoded:		ld bc,1
+@ReadGamma		add a : jr z,@ReloadByteRG1
+			rl c : rl b
+			add a : jr z,@ReloadByteRG2
+			jr c,@ReadGamma : ret
+
+@ReloadByteRG1		ld a,(hl) : inc hl : rla
+			rl c : rl b
+			add a : jr c,@ReadGamma : ret
+
+@ReloadByteRG2		ld a,(hl) : inc hl : rla
+			jr c,@ReadGamma : ret
+
+;
+;  pretty usual getbit for mixed datastreams
+
+@ReloadByte:		ld a,(hl) : inc hl : rla : ret
+
+MEND
+
diff --git a/tools/rasm/decrunch/unlzsa1_fast.asm b/tools/rasm/decrunch/unlzsa1_fast.asm
new file mode 100644
index 0000000..0e2617f
--- /dev/null
+++ b/tools/rasm/decrunch/unlzsa1_fast.asm
@@ -0,0 +1,204 @@
+;
+;  Speed-optimized LZSA1 decompressor by spke & uniabis (109 bytes)
+;
+;  ver.00 by spke for LZSA 0.5.4 (03-24/04/2019, 134 bytes);
+;  ver.01 by spke for LZSA 0.5.6 (25/04/2019, 110(-24) bytes, +0.2% speed);
+;  ver.02 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
+;  ver.03 by uniabis (30/07/2019, 109(-1) bytes, +3.5% speed);
+;  ver.04 by spke (31/07/2019, small re-organization of macros);
+;  ver.05 by uniabis (22/08/2019, 107(-2) bytes, same speed);
+;  ver.06 by spke for LZSA 1.0.7 (27/08/2019, 111(+4) bytes, +2.1% speed);
+;  ver.07 by spke for LZSA 1.1.0 (25/09/2019, added full revision history);
+;  ver.08 by spke for LZSA 1.1.2 (22/10/2019, re-organized macros and added an option for unrolled copying of long matches);
+;  ver.09 by spke for LZSA 1.2.1 (02/01/2020, 109(-2) bytes, same speed)
+;
+;  The data must be compressed using the command line compressor by Emmanuel Marty
+;  The compression is done as follows:
+;
+;  lzsa.exe -f1 -r <sourcefile> <outfile>
+;
+;  where option -r asks for the generation of raw (frame-less) data.
+;
+;  The decompression is done in the standard way:
+;
+;  ld hl,FirstByteOfCompressedData
+;  ld de,FirstByteOfMemoryForDecompressedData
+;  call DecompressLZSA1
+;
+;  Backward compression is also supported; you can compress files backward using:
+;
+;  lzsa.exe -f1 -r -b <sourcefile> <outfile>
+;
+;  and decompress the resulting files using:
+;
+;  ld hl,LastByteOfCompressedData
+;  ld de,LastByteOfMemoryForDecompressedData
+;  call DecompressLZSA1
+;
+;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
+;
+;  Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
+;  see https://github.com/emmanuel-marty/lzsa for more information
+;
+;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;
+;  This software is provided 'as-is', without any express or implied
+;  warranty.  In no event will the authors be held liable for any damages
+;  arising from the use of this software.
+;
+;  Permission is granted to anyone to use this software for any purpose,
+;  including commercial applications, and to alter it and redistribute it
+;  freely, subject to the following restrictions:
+;
+;  1. The origin of this software must not be misrepresented; you must not
+;     claim that you wrote the original software. If you use this software
+;     in a product, an acknowledgment in the product documentation would be
+;     appreciated but is not required.
+;  2. Altered source versions must be plainly marked as such, and must not be
+;     misrepresented as being the original software.
+;  3. This notice may not be removed or altered from any source distribution.
+
+;	DEFINE	UNROLL_LONG_MATCHES						; uncomment for faster decompression of very compressible data (+57 bytes)
+;	DEFINE	BACKWARD_DECOMPRESS
+
+	IFNDEF	BACKWARD_DECOMPRESS
+
+		MACRO NEXT_HL
+		inc hl
+		ENDM
+
+		MACRO ADD_OFFSET
+		ex de,hl : add hl,de
+		ENDM
+
+		MACRO COPY1
+		ldi
+		ENDM
+
+		MACRO COPYBC
+		ldir
+		ENDM
+
+	ELSE
+
+		MACRO NEXT_HL
+		dec hl
+		ENDM
+
+		MACRO ADD_OFFSET
+		ex de,hl : ld a,e : sub l : ld l,a
+		ld a,d : sbc h : ld h,a						; 4*4+3*4 = 28t / 7 bytes
+		ENDM
+
+		MACRO COPY1
+		ldd
+		ENDM
+
+		MACRO COPYBC
+		lddr
+		ENDM
+
+	ENDIF
+
+macro DecompressLZSA1
+		ld b,0 : jr @ReadToken
+
+@NoLiterals:	xor (hl) : NEXT_HL : jp m,@LongOffset
+
+@ShortOffset:	push de : ld e,(hl) : ld d,#FF
+
+ 		; short matches have length 0+3..14+3
+		add 3 : cp 15+3 : jr nc,@LongerMatch
+
+		; placed here this saves a JP per iteration
+@CopyMatch:	ld c,a
+.UseC		NEXT_HL : ex (sp),hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
+		ADD_OFFSET							; BC = len, DE = dest, HL = dest-offset, SP->[src]
+		COPY1 : COPY1 : COPYBC						; BC = 0, DE = dest
+.popSrc		pop hl								; HL = src
+	
+@ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,
+		; where LLL is the number of literals and MMMM is
+		; a length of the match that follows after the literals
+		ld a,(hl) : and #70 : jr z,@NoLiterals
+
+		cp #70 : jr z,@MoreLiterals					; LLL=7 means 7+ literals...
+		rrca : rrca : rrca : rrca : ld c,a				; LLL<7 means 0..6 literals...
+
+		ld a,(hl) : NEXT_HL
+		COPYBC
+
+		; the top bit of token is set if the offset contains two bytes
+		and #8F : jp p,@ShortOffset
+
+@LongOffset:	; read second byte of the offset
+		push de : ld e,(hl) : NEXT_HL : ld d,(hl)
+		add -128+3 : cp 15+3 : jp c,@CopyMatch
+
+	IFNDEF	UNROLL_LONG_MATCHES
+
+		; MMMM=15 indicates a multi-byte number of literals
+@LongerMatch:	NEXT_HL : add (hl) : jr nc,@CopyMatch
+
+		; the codes are designed to overflow;
+		; the overflow value 1 means read 1 extra byte
+		; and overflow value 0 means read 2 extra bytes
+.code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,@CopyMatch.UseC
+.code0		NEXT_HL : ld b,(hl)
+
+		; the two-byte match length equal to zero
+		; designates the end-of-data marker
+		ld a,b : or c : jr nz,@CopyMatch.UseC
+		pop de : ret
+
+	ELSE
+
+		; MMMM=15 indicates a multi-byte number of literals
+@LongerMatch:	NEXT_HL : add (hl) : jr c,@VeryLongMatch
+
+		ld c,a
+.UseC		NEXT_HL : ex (sp),hl
+		ADD_OFFSET
+		COPY1 : COPY1
+
+		; this is an unrolled equivalent of LDIR
+		xor a : sub c
+		and 32-1 : add a
+		ld (.jrOffset),a : jr nz,$+2
+.jrOffset	EQU $-1
+.fastLDIR	repeat 32
+		COPY1
+		rend
+		jp pe,.fastLDIR
+		jp @CopyMatch.popSrc
+
+@VeryLongMatch:	; the codes are designed to overflow;
+		; the overflow value 1 means read 1 extra byte
+		; and overflow value 0 means read 2 extra bytes
+.code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,@LongerMatch.UseC
+.code0		NEXT_HL : ld b,(hl)
+
+		; the two-byte match length equal to zero
+		; designates the end-of-data marker
+		ld a,b : or c : jr nz,@LongerMatch.UseC
+		pop de : ret
+
+	ENDIF
+
+@MoreLiterals:	; there are three possible situations here
+		xor (hl) : NEXT_HL : exa
+		ld a,7 : add (hl) : jr c,@ManyLiterals
+
+@CopyLiterals:	ld c,a
+.UseC		NEXT_HL : COPYBC
+
+		exa : jp p,@ShortOffset : jr @LongOffset
+
+@ManyLiterals:
+.code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,@CopyLiterals.UseC
+.code0		NEXT_HL : ld b,(hl) : jr @CopyLiterals.UseC
+
+mend
+
+
+
diff --git a/tools/rasm/decrunch/unlzsa2_fast.asm b/tools/rasm/decrunch/unlzsa2_fast.asm
new file mode 100755
index 0000000..8c6b5b1
--- /dev/null
+++ b/tools/rasm/decrunch/unlzsa2_fast.asm
@@ -0,0 +1,189 @@
+;
+;  Speed-optimized LZSA2 decompressor by spke & uniabis (216 bytes)
+;
+
+	DEFINE	UNROLL_LONG_MATCHES						; uncomment for faster decompression of very compressible data (+38 bytes)
+;	DEFINE	BACKWARD_DECOMPRESS						; uncomment for data compressed with option -b
+
+	IFNDEF	BACKWARD_DECOMPRESS
+
+		MACRO NEXT_HL
+		inc hl
+		ENDM
+
+		MACRO ADD_OFFSET
+		ex de,hl : add hl,de
+		ENDM
+
+		MACRO COPY1
+		ldi
+		ENDM
+
+		MACRO COPYBC
+		ldir
+		ENDM
+
+	ELSE
+
+		MACRO NEXT_HL
+		dec hl
+		ENDM
+
+		MACRO ADD_OFFSET
+		ex de,hl : ld a,e : sub l : ld l,a
+		ld a,d : sbc h : ld h,a						; 4*4+3*4 = 28t / 7 bytes
+		ENDM
+
+		MACRO COPY1
+		ldd
+		ENDM
+
+		MACRO COPYBC
+		lddr
+		ENDM
+
+	ENDIF
+
+
+macro DecompressLZSA2
+@lzsa2
+		; A' stores next nibble as %1111.... or assumed to contain trash
+		; B is assumed to be 0
+		ld b,0 : scf : exa : jr .ReadToken
+
+.ManyLiterals:	ld a,18 : add (hl) : NEXT_HL : jr nc,.CopyLiterals
+		ld c,(hl) : NEXT_HL
+		ld a,b : ld b,(hl)
+		jr .NEXTHLuseBC
+
+
+.MoreLiterals:	ld b,(hl) : NEXT_HL
+		scf : exa : jr nc,.noUpdatemoar
+
+			ld a,(hl) : or #F0 : exa
+			ld a,(hl) : NEXT_HL : or #0F
+			rrca : rrca : rrca : rrca
+
+.noUpdatemoar	;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
+		inc a : jr z,.ManyLiterals : sub #F0-3+1
+
+.CopyLiterals:	ld c,a : ld a,b : ld b,0
+		COPYBC
+		push de : or a : jp p,.CASE0xx ;: jr CASE1xx
+
+		cp %11000000 : jr c,.CASE10x
+
+.CASE11x		cp %11100000 : jr c,.CASE110
+
+		; "111": repeated offset
+.CASE111:	ld de,ix : jr .MatchLen
+
+
+.Literals0011:	jr nz,.MoreLiterals
+
+		; if "LL" of the byte token is equal to 0,
+		; there are no literals to copy
+.NoLiterals:	or (hl) : NEXT_HL
+		push de : jp m,.CASE1xx
+
+		; short (5 or 9 bit long) offsets
+.CASE0xx		ld d,#FF : cp %01000000 : jr c,.CASE00x
+
+		; "01x": the case of the 9-bit offset
+.CASE01x:	cp %01100000 : rl d
+
+.ReadOffsetE	ld e,(hl) : NEXT_HL
+
+.SaveOffset:	LD ix,de
+
+.MatchLen:	inc a : and %00000111 : jr z,.LongerMatch : inc a
+
+.CopyMatch:	ld c,a
+;.useC
+		ex (sp),hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
+		ADD_OFFSET						; BC = len, DE = dest, HL = dest-offset, SP->[src]
+		COPY1
+		COPYBC
+.popSrc		pop hl
+
+		; compressed data stream contains records
+		; each record begins with the byte token "XYZ|LL|MMM"
+.ReadToken:	ld a,(hl) : and %00011000 : jp pe,.Literals0011		; process the cases 00 and 11 separately
+
+		rrca : rrca : rrca
+
+		ld c,a : ld a,(hl)					; token is re-read for further processing
+.NEXTHLuseBC	NEXT_HL
+		COPYBC
+
+		; the token and literals are followed by the offset
+		push de : or a : jp p,.CASE0xx
+
+.CASE1xx		cp %11000000 : jr nc,.CASE11x
+
+		; "10x": the case of the 13-bit offset
+.CASE10x:	ld c,a : exa : jr nc,.noUpdatecase10x
+
+			ld a,(hl) : or #F0 : exa
+			ld a,(hl) : NEXT_HL : or #0F
+			rrca : rrca : rrca : rrca
+
+.noUpdatecase10x	ld d,a : ld a,c
+		cp %10100000 : dec d : rl d : jr .ReadOffsetE
+
+
+		
+		; "110": 16-bit offset
+.CASE110:	ld d,(hl) : NEXT_HL : jr .ReadOffsetE
+
+
+
+
+		; "00x": the case of the 5-bit offset
+.CASE00x:	ld c,a : exa : jr nc,.noUpdatecase00x
+
+			ld a,(hl) : or #F0 : exa
+			ld a,(hl) : NEXT_HL : or #0F
+			rrca : rrca : rrca : rrca
+
+.noUpdatecase00x	ld e,a : ld a,c
+		cp %00100000 : rl e : jp .SaveOffset
+
+
+.LongerMatch:	scf : exa : jr nc,.noUpdatelongermatch
+
+			ld a,(hl) : or #F0 : exa
+			ld a,(hl) : NEXT_HL : or #0F
+			rrca : rrca : rrca : rrca
+
+.noUpdatelongermatch	sub #F0-9 : cp 15+9 : jr c,.CopyMatch
+
+
+.LongMatch:	add (hl) : NEXT_HL : jr c,.VeryLongMatch
+
+		ld c,a
+.useC		ex (sp),hl
+		ADD_OFFSET
+		COPY1
+
+		; this is an unrolled equivalent of LDIR
+		xor a : sub c
+		and 32-1 : add a
+		ld (.jrOffset),a : jr nz,$+2
+.jrOffset	EQU $-1
+.fastLDIR	repeat 32
+		COPY1
+		rend
+		jp pe,.fastLDIR
+		jp .popSrc
+
+.VeryLongMatch:	ld c,(hl) : NEXT_HL
+		ld b,(hl) : NEXT_HL : jr nz,.useC
+		pop de : ret
+
+mend
+
+
+
+
+
-- 
cgit v1.2.3