aboutsummaryrefslogtreecommitdiff
path: root/tools/rasm/decrunch
diff options
context:
space:
mode:
authorJuan J. Martinez <jjm@usebox.net>2022-09-06 07:37:20 +0100
committerJuan J. Martinez <jjm@usebox.net>2022-09-06 07:37:20 +0100
commit30bf0f51335e87812ffeb54e9437f0b6a1514d67 (patch)
tree9c85a2de53b4da69fcfaa84488cc6c12ebd3e5d0 /tools/rasm/decrunch
parentd8990284057e6401d0374f439df51879595d804d (diff)
downloadubox-msx-lib-30bf0f51335e87812ffeb54e9437f0b6a1514d67.tar.gz
ubox-msx-lib-30bf0f51335e87812ffeb54e9437f0b6a1514d67.zip
Updated rasm to 1.7
Diffstat (limited to 'tools/rasm/decrunch')
-rw-r--r--tools/rasm/decrunch/dzx0_fast.asm237
-rw-r--r--tools/rasm/decrunch/dzx0_standard.asm64
-rw-r--r--tools/rasm/decrunch/dzx0_standard_back.asm65
-rw-r--r--tools/rasm/decrunch/dzx0_turbo_back.asm101
-rw-r--r--tools/rasm/decrunch/dzx7_turbo.asm4
-rw-r--r--tools/rasm/decrunch/exomizer3megachur.asm210
-rw-r--r--tools/rasm/decrunch/lz48decrunch_v006.asm113
-rw-r--r--tools/rasm/decrunch/lz48decrunch_v006b.asm78
-rw-r--r--tools/rasm/decrunch/unaplib.asm (renamed from tools/rasm/decrunch/aplib_z80_todo.asm)8
-rw-r--r--tools/rasm/decrunch/unaplib_fast.asm266
-rw-r--r--tools/rasm/decrunch/unlzsa1_fast.asm204
-rwxr-xr-xtools/rasm/decrunch/unlzsa2_fast.asm189
12 files changed, 1210 insertions, 329 deletions
diff --git a/tools/rasm/decrunch/dzx0_fast.asm b/tools/rasm/decrunch/dzx0_fast.asm
new file mode 100644
index 0000000..55f4388
--- /dev/null
+++ b/tools/rasm/decrunch/dzx0_fast.asm
@@ -0,0 +1,237 @@
+;
+; Speed-optimized ZX0 decompressor by spke (187 bytes)
+;
+; ver.00 by spke (27/01-23/03/2021, 191 bytes)
+; ver.01 by spke (24/03/2021, 193(+2) bytes - fixed a bug in the initialization)
+; ver.01patch2 by uniabis (25/03/2021, 191(-2) bytes - fixed a bug with elias over 8bits)
+; ver.01patch9 by uniabis (10/09/2021, 187(-4) bytes - support for new v2 format)
+;
+; Original ZX0 decompressors were written by Einar Saukas
+;
+; This decompressor was written on the basis of "Standard" decompressor by
+; Einar Saukas and optimized for speed by spke. This decompressor is
+; about 5% faster than the "Turbo" decompressor, which is 128 bytes long.
+; It has about the same speed as the 412 bytes version of the "Mega" decompressor.
+;
+; The decompressor uses AF, BC, DE, HL and IX and relies upon self-modified code.
+;
+; The decompression is done in the standard way:
+;
+; ld hl,FirstByteOfCompressedData
+; ld de,FirstByteOfMemoryForDecompressedData
+; call DecompressZX0
+;
+; Of course, ZX0 compression algorithms are (c) 2021 Einar Saukas,
+; see https://github.com/einar-saukas/ZX0 for more information
+;
+; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any damages
+; arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must not
+; claim that you wrote the original software. If you use this software
+; in a product, an acknowledgment in the product documentation would be
+; appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must not be
+; misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source distribution.
+
+macro DecompressZX0
+
+ ld ix, @CopyMatch1
+ ld bc, $ffff
+ ld (@PrevOffset+1), bc ; default offset is -1
+ inc bc
+ ld a, $80
+ jr @RunOfLiterals ; BC is assumed to contains 0 most of the time
+
+@ShorterOffsets:
+ ld b, $ff ; the top byte of the offset is always $FF
+ ld c, (hl)
+ inc hl
+ rr c
+ ld (@PrevOffset+1), bc
+ jr nc, @LongerMatch
+
+@CopyMatch2: ; the case of matches with len=2
+ ld bc, 2
+
+ ; the faster match copying code
+@CopyMatch1:
+ push hl ; preserve source
+
+@PrevOffset:
+ ld hl, $ffff ; restore offset (default offset is -1)
+ add hl, de ; HL = dest - offset
+ ldir
+ pop hl ; restore source
+
+ ; after a match you can have either
+ ; 0 + <elias length> = run of literals, or
+ ; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+@AfterMatch1:
+ add a, a
+ jr nc, @RunOfLiterals
+
+@UsualMatch: ; this is the case of usual match+offset
+ add a, a
+ jr nc, @LongerOffets
+ jr nz, @ShorterOffsets ; NZ after NC == "confirmed C"
+
+ ld a, (hl) ; reload bits
+ inc hl
+ rla
+
+ jr c, @ShorterOffsets
+
+@LongerOffets:
+ ld c, $fe
+
+ add a, a ; inline read gamma
+ rl c
+ add a, a
+ jr nc, $-4
+
+ call z, @ReloadReadGamma
+
+@ProcessOffset:
+
+ inc c
+ ret z ; end-of-data marker (only checked for longer offsets)
+ rr c
+ ld b, c
+ ld c, (hl)
+ inc hl
+ rr c
+ ld (@PrevOffset+1), bc
+
+ ; lowest bit is the first bit of the gamma code for length
+ jr c, @CopyMatch2
+
+@LongerMatch:
+ ld bc, 1
+
+ add a, a ; inline read gamma
+ rl c
+ add a, a
+ jr nc, $-4
+
+ call z,@ReloadReadGamma
+
+@CopyMatch3:
+ push hl ; preserve source
+ ld hl, (@PrevOffset+1) ; restore offset
+ add hl, de ; HL = dest - offset
+
+ ; because BC>=3-1, we can do 2 x LDI safely
+ ldi
+ ldir
+ inc c
+ ldi
+ pop hl ; restore source
+
+ ; after a match you can have either
+ ; 0 + <elias length> = run of literals, or
+ ; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+@AfterMatch3:
+ add a, a
+ jr c, @UsualMatch
+
+@RunOfLiterals:
+ inc c
+ add a, a
+ jr nc, @LongerRun
+ jr nz, @CopyLiteral ; NZ after NC == "confirmed C"
+
+ ld a, (hl) ; reload bits
+ inc hl
+ rla
+
+ jr c, @CopyLiteral
+
+@LongerRun:
+ add a, a ; inline read gamma
+ rl c
+ add a, a
+ jr nc, $-4
+
+ jr nz, @CopyLiterals
+
+ ld a, (hl) ; reload bits
+ inc hl
+ rla
+
+ call nc, @ReadGammaAligned
+
+@CopyLiterals:
+ ldi
+
+@CopyLiteral:
+ ldir
+
+ ; after a literal run you can have either
+ ; 0 + <elias length> = match using a repeated offset, or
+ ; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
+ add a, a
+ jr c, @UsualMatch
+
+@RepMatch:
+ inc c
+ add a, a
+ jr nc, @LongerRepMatch
+ jr nz, @CopyMatch1 ; NZ after NC == "confirmed C"
+
+ ld a, (hl) ; reload bits
+ inc hl
+ rla
+
+ jr c, @CopyMatch1
+
+@LongerRepMatch:
+ add a, a ; inline read gamma
+ rl c
+ add a, a
+ jr nc, $-4
+
+ jp nz, @CopyMatch1
+
+ ; this is a crafty equivalent of CALL ReloadReadGamma : JP CopyMatch1
+ push ix
+
+ ; the subroutine for reading the remainder of the partly read Elias gamma code.
+ ; it has two entry points: ReloadReadGamma first refills the bit reservoir in A,
+ ; while ReadGammaAligned assumes that the bit reservoir has just been refilled.
+@ReloadReadGamma:
+ ld a, (hl) ; reload bits
+ inc hl
+ rla
+
+ ret c
+@ReadGammaAligned:
+ add a, a
+ rl c
+ add a, a
+ ret c
+ add a, a
+ rl c
+ add a, a
+@ReadingLongGamma: ; this loop does not need unrolling, as it does not get much use anyway
+ ret c
+ add a, a
+ rl c
+ rl b
+ add a, a
+ jr nz, @ReadingLongGamma
+
+ ld a, (hl) ; reload bits
+ inc hl
+ rla
+ jr @ReadingLongGamma
+mend
+
diff --git a/tools/rasm/decrunch/dzx0_standard.asm b/tools/rasm/decrunch/dzx0_standard.asm
new file mode 100644
index 0000000..6525c8b
--- /dev/null
+++ b/tools/rasm/decrunch/dzx0_standard.asm
@@ -0,0 +1,64 @@
+; -----------------------------------------------------------------------------
+; ZX0 decoder by Einar Saukas & Urusergi
+; "Standard" version (68 bytes only)
+; -----------------------------------------------------------------------------
+; Parameters:
+; HL: source address (compressed data)
+; DE: destination address (decompressing)
+; -----------------------------------------------------------------------------
+
+macro dzx0_standard
+ ld bc, $ffff ; preserve default offset 1
+ push bc
+ inc bc
+ ld a, $80
+@dzx0s_literals:
+ call @dzx0s_elias ; obtain length
+ ldir ; copy literals
+ add a, a ; copy from last offset or new offset?
+ jr c, @dzx0s_new_offset
+ call @dzx0s_elias ; obtain length
+@dzx0s_copy:
+ ex (sp), hl ; preserve source, restore offset
+ push hl ; preserve offset
+ add hl, de ; calculate destination - offset
+ ldir ; copy from offset
+ pop hl ; restore offset
+ ex (sp), hl ; preserve offset, restore source
+ add a, a ; copy from literals or new offset?
+ jr nc, @dzx0s_literals
+@dzx0s_new_offset:
+ pop bc ; discard last offset
+ ld c, $fe ; prepare negative offset
+ call @dzx0s_elias_loop ; obtain offset MSB
+ inc c
+ ret z ; check end marker
+ ld b, c
+ ld c, (hl) ; obtain offset LSB
+ inc hl
+ rr b ; last offset bit becomes first length bit
+ rr c
+ push bc ; preserve new offset
+ ld bc, 1 ; obtain length
+ call nc, @dzx0s_elias_backtrack
+ inc bc
+ jr @dzx0s_copy
+@dzx0s_elias:
+ inc c ; interlaced Elias gamma coding
+@dzx0s_elias_loop:
+ add a, a
+ jr nz, @dzx0s_elias_skip
+ ld a, (hl) ; load another group of 8 bits
+ inc hl
+ rla
+@dzx0s_elias_skip:
+ ret c
+@dzx0s_elias_backtrack:
+ add a, a
+ rl c
+ rl b
+ jr @dzx0s_elias_loop
+mend
+; -----------------------------------------------------------------------------
+
+
diff --git a/tools/rasm/decrunch/dzx0_standard_back.asm b/tools/rasm/decrunch/dzx0_standard_back.asm
new file mode 100644
index 0000000..3da94bd
--- /dev/null
+++ b/tools/rasm/decrunch/dzx0_standard_back.asm
@@ -0,0 +1,65 @@
+; -----------------------------------------------------------------------------
+; ZX0 decoder by Einar Saukas
+; "Standard" version (69 bytes only) - BACKWARDS VARIANT
+; -----------------------------------------------------------------------------
+; Parameters:
+; HL: last source address (compressed data)
+; DE: last destination address (decompressing)
+; -----------------------------------------------------------------------------
+
+Macro dzx0_standard_back
+ ld bc, 1 ; preserve default offset 1
+ push bc
+ ld a, $80
+@dzx0sb_literals:
+ call @dzx0sb_elias ; obtain length
+ lddr ; copy literals
+ inc c
+ add a, a ; copy from last offset or new offset?
+ jr c, @dzx0sb_new_offset
+ call @dzx0sb_elias ; obtain length
+@dzx0sb_copy:
+ ex (sp), hl ; preserve source, restore offset
+ push hl ; preserve offset
+ add hl, de ; calculate destination - offset
+ lddr ; copy from offset
+ inc c
+ pop hl ; restore offset
+ ex (sp), hl ; preserve offset, restore source
+ add a, a ; copy from literals or new offset?
+ jr nc, @dzx0sb_literals
+@dzx0sb_new_offset:
+ inc sp ; discard last offset
+ inc sp
+ call @dzx0sb_elias ; obtain offset MSB
+ dec b
+ ret z ; check end marker
+ dec c ; adjust for positive offset
+ ld b, c
+ ld c, (hl) ; obtain offset LSB
+ dec hl
+ srl b ; last offset bit becomes first length bit
+ rr c
+ inc bc
+ push bc ; preserve new offset
+ ld bc, 1 ; obtain length
+ call c, @dzx0sb_elias_backtrack
+ inc bc
+ jr @dzx0sb_copy
+@dzx0sb_elias_backtrack:
+ add a, a
+ rl c
+ rl b
+@dzx0sb_elias:
+ add a, a ; inverted interlaced Elias gamma coding
+ jr nz, @dzx0sb_elias_skip
+ ld a, (hl) ; load another group of 8 bits
+ dec hl
+ rla
+@dzx0sb_elias_skip:
+ jr c, @dzx0sb_elias_backtrack
+ ret
+mend
+; -----------------------------------------------------------------------------
+
+
diff --git a/tools/rasm/decrunch/dzx0_turbo_back.asm b/tools/rasm/decrunch/dzx0_turbo_back.asm
new file mode 100644
index 0000000..d009e92
--- /dev/null
+++ b/tools/rasm/decrunch/dzx0_turbo_back.asm
@@ -0,0 +1,101 @@
+; -----------------------------------------------------------------------------
+; ZX0 decoder by Einar Saukas & introspec
+; "Turbo" version (126 bytes, 21% faster) - BACKWARDS VARIANT
+; -----------------------------------------------------------------------------
+; Parameters:
+; HL: last source address (compressed data)
+; DE: last destination address (decompressing)
+; -----------------------------------------------------------------------------
+
+macro dzx0_turbo_back
+ ld bc, 1 ; preserve default offset 1
+ ld (@dzx0tb_last_offset+1), bc
+ ld a, $80
+ jr @dzx0tb_literals
+@dzx0tb_new_offset:
+ add a, a ; obtain offset MSB
+ call c, @dzx0tb_elias
+ dec b
+ ret z ; check end marker
+ dec c ; adjust for positive offset
+ ld b, c
+ ld c, (hl) ; obtain offset LSB
+ dec hl
+ srl b ; last offset bit becomes first length bit
+ rr c
+ inc bc
+ ld (@dzx0tb_last_offset+1), bc ; preserve new offset
+ ld bc, 1 ; obtain length
+ call c, @dzx0tb_elias_loop
+ inc bc
+@dzx0tb_copy:
+ push hl ; preserve source
+@dzx0tb_last_offset:
+ ld hl, 0 ; restore offset
+ add hl, de ; calculate destination - offset
+ lddr ; copy from offset
+ inc c
+ pop hl ; restore source
+ add a, a ; copy from literals or new offset?
+ jr c, @dzx0tb_new_offset
+@dzx0tb_literals:
+ add a, a ; obtain length
+ call c, @dzx0tb_elias
+ lddr ; copy literals
+ inc c
+ add a, a ; copy from last offset or new offset?
+ jr c, @dzx0tb_new_offset
+ add a, a ; obtain length
+ call c, @dzx0tb_elias
+ jp @dzx0tb_copy
+@dzx0tb_elias_loop:
+ add a, a
+ rl c
+ add a, a
+ ret nc
+@dzx0tb_elias:
+ jp nz, @dzx0tb_elias_loop ; inverted interlaced Elias gamma coding
+ ld a, (hl) ; load another group of 8 bits
+ dec hl
+ rla
+ ret nc
+ add a, a
+ rl c
+ add a, a
+ ret nc
+ add a, a
+ rl c
+ add a, a
+ ret nc
+ add a, a
+ rl c
+ add a, a
+ ret nc
+@dzx0tb_elias_reload:
+ add a, a
+ rl c
+ rl b
+ add a, a
+ ld a, (hl) ; load another group of 8 bits
+ dec hl
+ rla
+ ret nc
+ add a, a
+ rl c
+ rl b
+ add a, a
+ ret nc
+ add a, a
+ rl c
+ rl b
+ add a, a
+ ret nc
+ add a, a
+ rl c
+ rl b
+ add a, a
+ jr c, @dzx0tb_elias_reload
+ ret
+; -----------------------------------------------------------------------------
+mend
+
diff --git a/tools/rasm/decrunch/dzx7_turbo.asm b/tools/rasm/decrunch/dzx7_turbo.asm
index 779ced5..cb66be5 100644
--- a/tools/rasm/decrunch/dzx7_turbo.asm
+++ b/tools/rasm/decrunch/dzx7_turbo.asm
@@ -8,7 +8,7 @@
; -----------------------------------------------------------------------------
dzx7_turbo:
- ld a, $80
+ ld a, #80
dzx7t_copy_byte_loop:
ldi ; copy literal byte
dzx7t_main_loop:
@@ -42,7 +42,7 @@ dzx7t_len_value_start:
; determine offset
ld e, (hl) ; load offset flag (1 bit) + offset value (7 bits)
inc hl
- defb $cb, $33 ; opcode for undocumented instruction "SLL E" aka "SLS E"
+ defb #cb, #33 ; opcode for undocumented instruction "SLL E" aka "SLS E"
jr nc, dzx7t_offset_end ; if offset flag is set, load 4 extra bits
add a, a ; check next bit
call z, dzx7t_load_bits ; no more bits left?
diff --git a/tools/rasm/decrunch/exomizer3megachur.asm b/tools/rasm/decrunch/exomizer3megachur.asm
deleted file mode 100644
index ea1973e..0000000
--- a/tools/rasm/decrunch/exomizer3megachur.asm
+++ /dev/null
@@ -1,210 +0,0 @@
-;Exomizer 2 Z80 decoder
-;Copyright (C) 2008-2016 by Jaime Tejedor Gomez (Metalbrain)
-;
-;Optimized by Antonio Villena and Urusergi (169 bytes)
-;
-;Compression algorithm by Magnus Lind
-;
-; This depacker is free software; you can redistribute it and/or
-; modify it under the terms of the GNU Lesser General Public
-; License as published by the Free Software Foundation; either
-; version 2.1 of the License, or (at your option) any later version.
-;
-; This library is distributed in the hope that it will be useful,
-; but WITHOUT ANY WARRANTY; without even the implied warranty of
-; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-; Lesser General Public License for more details.
-;
-; You should have received a copy of the GNU Lesser General Public
-; License along with this library; if not, write to the Free Software
-; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;
-;
-;input- hl=compressed data start
-; de=uncompressed destination start
-;
-; you may change exo_mapbasebits to point to any free buffer
-;
-;ATTENTION!
-;A huge speed boost (around 14%) can be gained at the cost of only 5 bytes.
-;If you want this, replace all instances of "call exo_getbit" with "srl a" followed by
-;"call z,exo_getbit", and remove the first two instructions in exo_getbit routine.
-; ---------------------------
-; modified by Megachur in 2018
-; ---------------------------
-; hl -> compressed data start
-; de -> uncompressed destination start
-; ---------------------------
-
-;EXO_BACKWARD equ 1
-ENABLE_MEXO_GETBIT equ 1
-
-list:EXOMIZER_ADDRESS:nolist
-; ---------------------------
-MACRO MEXO_GETBIT
- srl a
- jr nz,@1
- ld a,(hl)
- IFDEF EXO_BACKWARD
- dec hl
- ELSE
- inc hl
- ENDIF
- rra
-@1
-ENDM
-
-deexo:
- ld iy,exo_mapbasebits+11
- ld a,(hl)
-
- IFDEF EXO_BACKWARD
- dec hl
- ELSE
- inc hl
- ENDIF
-
- ld b,52
- push de
- cp a
-
-exo_initbits:
- ld c,16
- jr nz,exo_get4bits
- ld ixl,c
- ld de,1 ;DE=b2
-
-exo_get4bits:
- IFDEF ENABLE_MEXO_GETBIT
- MEXO_GETBIT
- ELSE
- srl a:call z,exo_getbit ;call exo_getbit ;get one bit
- ENDIF
- rl c
- jr nc,exo_get4bits
- inc c
- push hl
- ld hl,1
- ld (iy+41),c ;bits[i]=b1 (and opcode 41 == add hl,hl)
-
-exo_setbit:
- dec c
- jr nz,exo_setbit-1 ;jump to add hl,hl instruction
- ld (iy-11),e
- ld (iy+93),d ;base[i]=b2
- add hl,de
- ex de,hl
- inc iy
- pop hl
- dec ixl
- djnz exo_initbits
- pop de
- jr exo_mainloop
-
-exo_literalrun:
- ld e,c ;DE=1
-
-exo_getbits:
- dec b
- ret z
-
-exo_getbits1:
- IFDEF ENABLE_MEXO_GETBIT
- MEXO_GETBIT
- ELSE
- srl a:call z,exo_getbit ;call exo_getbit
- ENDIF
- rl e
- rl d
- jr nc,exo_getbits
- ld b,d
- ld c,e
- pop de
-
-exo_literalcopy:
- IFDEF EXO_BACKWARD
- lddr
- ELSE
- ldir
- ENDIF
-exo_mainloop:
- inc c
- IFDEF ENABLE_MEXO_GETBIT
- MEXO_GETBIT
- ELSE
- srl a:call z,exo_getbit ;call exo_getbit ;literal?
- ENDIF
- jr c,exo_literalcopy
- ld c,239
-exo_getindex:
- IFDEF ENABLE_MEXO_GETBIT
- MEXO_GETBIT
- ELSE
- srl a:call z,exo_getbit ;call exo_getbit
- ENDIF
- inc c
- jr nc,exo_getindex
- ret z
- push de
- ld d,b
- jp p,exo_literalrun
- ld iy,exo_mapbasebits-229
- call exo_getpair
- push de
- rlc d
- jr nz,exo_dontgo
- dec e
- ld bc,512+32 ;2 bits,48 offset
- jr z,exo_goforit
- dec e ;2?
-exo_dontgo:
- ld bc,1024+16 ;4 bits,32 offset
- jr z,exo_goforit
- ld de,0
- ld c,d ;16 offset
-exo_goforit:
- call exo_getbits1
- ld iy,exo_mapbasebits+27
- add iy,de
- call exo_getpair
- pop bc
- ex (sp),hl
- IFDEF EXO_BACKWARD
- ex de,hl
- add hl,de
- lddr
- ELSE
- push hl
- sbc hl,de
- pop de
- ldir
- ENDIF
- pop hl
- jr exo_mainloop ;Next!
-exo_getpair:
- add iy,bc
- ld e,d
- ld b,(iy+41)
- call exo_getbits
- ex de,hl
- ld c,(iy-11)
- ld b,(iy+93)
- add hl,bc ;Always clear C flag
- ex de,hl
- ret
-
- IFDEF ENABLE_MEXO_GETBIT
- ELSE
-exo_getbit:
-; srl a
-; ret nz
- ld a,(hl)
- inc hl
- rra
- ret
- ENDIF
-
-exo_mapbasebits:
- ds 156,#00 ;tables for bits,baseL,baseH
-; ---------------------------
-list:EXOMIZER_ADDRESS_LENGTH equ $-EXOMIZER_ADDRESS:nolist \ No newline at end of file
diff --git a/tools/rasm/decrunch/lz48decrunch_v006.asm b/tools/rasm/decrunch/lz48decrunch_v006.asm
deleted file mode 100644
index 750b571..0000000
--- a/tools/rasm/decrunch/lz48decrunch_v006.asm
+++ /dev/null
@@ -1,113 +0,0 @@
-;
-; LZ48 decrunch
-;
-; hl compressed data adress
-; de output adress of data
-;
-
-
-org #8000
-
-; CALL #8000,source,destination
-di
-
-; parameters
-ld h,(ix+3)
-ld l,(ix+2)
-ld d,(ix+1)
-ld e,(ix+0)
-
-call LZ48_decrunch
-
-ei
-ret
-
-
-
-
-
-LZ48_decrunch
-ldi
-ld b,0
-
-nextsequence
-ld a,(hl)
-inc hl
-ld lx,a
-and #F0
-jr z,lzunpack ; no litteral bytes
-rrca
-rrca
-rrca
-rrca
-
-ld c,a
-cp 15 ; more bytes for length?
-jr nz,copyliteral
-
-getadditionallength
-ld a,(hl)
-inc hl
-inc a
-jr nz,lengthnext
-inc b
-dec bc
-jr getadditionallength
-lengthnext
-dec a
-add a,c
-ld c,a
-ld a,b
-adc a,0
-ld b,a ; bc=length
-
-copyliteral
-ldir
-
-lzunpack
-ld a,lx
-and #F
-add 3
-ld c,a
-cp 18 ; more bytes for length?
-jr nz,readoffset
-
-getadditionallengthbis
-ld a,(hl)
-inc hl
-inc a
-jr nz,lengthnextbis
-inc b
-dec bc
-jr getadditionallengthbis
-lengthnextbis
-dec a
-add a,c
-ld c,a
-ld a,b
-adc a,0
-ld b,a ; bc=length
-
-readoffset
-; read encoded offset
-ld a,(hl)
-inc a
-ret z ; LZ48 end with zero offset
-inc hl
-push hl
-ld l,a
-ld a,e
-sub l
-ld l,a
-ld a,d
-sbc a,0
-ld h,a
-; source=dest-copyoffset
-
-copykey
-ldir
-
-pop hl
-jr nextsequence
-
-
diff --git a/tools/rasm/decrunch/lz48decrunch_v006b.asm b/tools/rasm/decrunch/lz48decrunch_v006b.asm
new file mode 100644
index 0000000..69162a4
--- /dev/null
+++ b/tools/rasm/decrunch/lz48decrunch_v006b.asm
@@ -0,0 +1,78 @@
+;
+; LZ48 decrunch
+;
+
+; In ; HL=compressed data address
+; ; DE=output data address
+; Out ; HL last address of compressed data read (you must inc once for LZ48 stream)
+; ; DE last address of decrunched data write +1
+; ; BC always 3
+; ; A always zero
+; ; IXL undetermined
+; ; flags (inc a -> 0)
+; Modif ; AF, BC, DE, HL, IXL
+LZ48_decrunch
+ ldi
+ ld b,0
+
+nextsequence
+ ld a,(hl)
+ inc hl
+ cp #10
+ jr c,lzunpack ; no literal bytes
+ ld ixl,a
+ and #f0
+ rrca
+ rrca
+ rrca
+ rrca
+
+ cp 15 ; more bytes for literal length?
+ jr nz,copyliteral
+getadditionallength
+ ld c,(hl) ; get additional literal length byte
+ inc hl
+ add a,c ; compute literal length total
+ jr nc,lengthNC
+ inc b
+lengthNC
+ inc c
+ jr z,getadditionallength ; if last literal length byte was 255, we have more bytes to process
+copyliteral
+ ld c,a
+ ldir
+ ld a,ixl
+ and #F
+lzunpack
+ add 3
+ cp 18 ; more bytes for match length?
+ jr nz,readoffset
+getadditionallengthbis
+ ld c,(hl) ; get additional match length byte
+ inc hl
+ add a,c ; compute match length size total
+ jr nc,lengthNCbis
+ inc b
+lengthNCbis
+ inc c
+ jr z,getadditionallengthbis ; if last match length byte was 255, we have more bytes to process
+
+readoffset
+ ld c,a
+; read encoded offset
+ ld a,(hl)
+ inc a
+ ret z ; LZ48 end with zero offset
+ inc hl
+ push hl
+; source=dest-copyoffset
+ ; A != 0 here
+ neg
+ ld l,a
+ ld h,#ff
+ add hl,de
+copykey
+ ldir
+
+ pop hl
+ jr nextsequence
diff --git a/tools/rasm/decrunch/aplib_z80_todo.asm b/tools/rasm/decrunch/unaplib.asm
index 6843a14..13c8678 100644
--- a/tools/rasm/decrunch/aplib_z80_todo.asm
+++ b/tools/rasm/decrunch/unaplib.asm
@@ -3,10 +3,10 @@
;hl = source
;de = dest
-ap_bits: .db 0
-ap_byte: .db 0
-lwm: .db 0
-r0: .dw 0
+ap_bits: db 0
+ap_byte: db 0
+lwm: db 0
+r0: dw 0
ap_getbit:
push bc
diff --git a/tools/rasm/decrunch/unaplib_fast.asm b/tools/rasm/decrunch/unaplib_fast.asm
new file mode 100644
index 0000000..47ad16b
--- /dev/null
+++ b/tools/rasm/decrunch/unaplib_fast.asm
@@ -0,0 +1,266 @@
+;
+; Speed-optimized ApLib decompressor by spke (ver.04 spring 2020, 236 bytes)
+;
+; The original Z80 decompressors for ApLib were written by Dan Weiss (Dwedit),
+; then tweaked by Francisco Javier Pena Pareja (utopian),
+; and optimized by Jaime Tejedor Gomez (Metalbrain) and Antonio Villena.
+;
+; This is a new "implicit state" decompressor heavily optimized for speed by spke.
+; (It is 11 bytes shorter and 14% faster than the previously fastest
+; 247b decompressor by Metalbrain and Antonio Villena.)
+;
+; ver.00 by spke (21/08/2018-01/09/2018, 244 bytes, an edit of the existing 247b decompressor);
+; ver.01 by spke (12-13/11/2018, 234(-10) bytes, +3% speed using the state machine for LWM);
+; ver.02 by spke (06/08/2019, +1% speed);
+; ver.03 by spke (27/08/2019, 236(+2) bytes, +1% speed using partly expanded LDIR);
+; ver.04 by spke (spring 2020, added full revision history and support for long offsets)
+;
+; The data must be compressed using any compressor for ApLib capable of generating raw data.
+; At present, two best available compressors are:
+;
+; "APC" by Sven-Ake Dahl: https://github.com/svendahl/cap or
+; "apultra" by Emmanuel Marty: https://github.com/emmanuel-marty/apultra
+;
+; The compression can done as follows:
+;
+; apc.exe e <sourcefile> <outfile>
+; or
+; apultra.exe <sourcefile> <outfile>
+;
+; A decent compressor was written by r57shell (although it is worse than compressors above):
+; http://gendev.spritesmind.net/forum/viewtopic.php?p=32548#p32548
+; The use of the official ApLib compressor by Joergen Ibsen is not recommended.
+;
+; The decompression is done in the standard way:
+;
+; ld hl,FirstByteOfCompressedData
+; ld de,FirstByteOfMemoryForDecompressedData
+; call DecompressApLib
+;
+; The decompressor modifies AF, AF', BC, DE, HL, IXH, IY.
+; (However, note that the option "AllowSelfmodifyingCode" removes the dependency on IY.)
+;
+; Of course, ApLib compression algorithms are (c) 1998-2014 Joergen Ibsen,
+; see http://www.ibsensoftware.com/ for more information
+;
+; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any damages
+; arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must not
+; claim that you wrote the original software. If you use this software
+; in a product, an acknowledgment in the product documentation would be
+; appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must not be
+; misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source distribution.
+
+; DEFINE SupportLongOffsets ; +4 bytes for long offset support. slows decompression down by 1%, but may be needed to decompress files >=32K
+
+MACRO ApUnpack
+
+ ld a,128 : jr @LWM0_CASE0
+
+;==================================================================================================================
+;==================================================================================================================
+;==================================================================================================================
+
+@LWM0: ;LWM = 0 (LWM stands for "Last Was Match"; a flag that we did not have a match)
+
+@LWM0_ReloadByteC0 ld a,(hl) : inc hl : rla
+ jr c,@LWM0_Check2ndBit
+
+;
+; case "0"+BYTE: copy a single literal
+
+@LWM0_CASE0: ldi ; first byte is always copied as literal
+
+;
+; main decompressor loop
+
+@LWM0_MainLoop: add a : jr z,@LWM0_ReloadByteC0 : jr nc,@LWM0_CASE0 ; "0"+BYTE = copy literal
+@LWM0_Check2ndBit add a : call z,@ReloadByte : jr nc,@LWM0_CASE10 ; "10"+gamma(offset/256)+BYTE+gamma(length) = the main matching mechanism
+ add a : call z,@ReloadByte : jp c,@LWM1_CASE111 ; "110"+[oooooool] = matched 2-3 bytes with a small offset
+
+;
+; branch "110"+[oooooool]: copy two or three bytes (bit "l") with the offset -1..-127 (bits "ooooooo"), or stop
+
+@LWM0_CASE110: ; "use 7 bit offset, length = 2 or 3"
+ ; "if a zero is found here, it's EOF"
+ ld c,(hl) : rr c : ret z ; process EOF
+ inc hl
+ ld b,0
+
+ ld iyl,c : ld iyh,b ; save offset for future LWMs
+
+ push hl ; save src
+ ld h,d : ld l,e ; HL = dest
+ jr c,@LWM0_LengthIs3
+
+@LWM0_LengthIs2 sbc hl,bc
+ ldi : ldi
+ jr @LWM0_PreMainLoop
+
+@LWM0_LengthIs3 or a : sbc hl,bc
+ ldi : ldi : ldi
+ jr @LWM0_PreMainLoop
+
+;
+; branch "10"+gamma(offset/256)+BYTE+gamma(length): the main matching mechanism
+
+@LWM0_CASE10: ; "use a gamma code * 256 for offset, another gamma code for length"
+ call @GetGammaCoded
+
+ ; the original decompressor contains
+ ;
+ ; if ((LWM == 0) && (offs == 2)) { ... }
+ ; else {
+ ; if (LWM == 0) { offs -= 3; }
+ ; else { offs -= 2; }
+ ; }
+ ;
+ ; so, the idea here is to use the fact that GetGammaCoded returns (offset/256)+2,
+ ; and to split the first condition by noticing that C-1 can never be zero
+ dec c : dec c : jr z,@LWM1_KickInLWM
+
+@LWM0_AfterLWM dec c : ld b,c : ld c,(hl) : inc hl ; BC = offset
+
+ ld iyl,c : ld iyh,b : push bc
+
+ call @GetGammaCoded ; BC = len*
+
+ ex (sp),hl
+
+ ; interpretation of length value is offset-dependent:
+ ; if (offs >= 32000) len++; if (offs >= 1280) len++; if (offs < 128) len+=2;
+ ; in other words,
+ ; (1 <= offs < 128) +=2
+ ; (128 <= offs < 1280) +=0
+ ; (1280 <= offs < 31999) +=1
+ ; NB offsets over 32000 need one more check, but other Z80 decompressors seem to ignore it. is it not needed?
+
+ ; interpretation of length value is offset-dependent
+ exa : ld a,h
+ IFDEF SupportLongOffsets
+ ; NB offsets over 32000 require an additional check, which is skipped in most
+ ; Z80 decompressors (seemingly as a performance optimization)
+ cp 32000>>8 : jr nc,@LWM0_Add2
+ ENDIF
+ cp 5 : jr nc,@LWM0_Add1
+ or a : jr nz,@LWM0_Add0
+ bit 7,l : jr nz,@LWM0_Add0
+@LWM0_Add2 inc bc
+@LWM0_Add1 inc bc
+@LWM0_Add0 ; for offs<128 : 4+4+7+7 + 4+7 + 8+7 + 6+6 = 60t
+ ; for offs>=1280 : 4+4+7+12 + 6 = 33t
+ ; for 128<=offs<1280 : 4+4+7+7 + 4+12 = 38t OR 4+4+7+7 + 4+7+8+12 = 53t
+; dec bc
+
+@LWM0_CopyMatch: ; this assumes that BC = len, DE = offset, HL = dest
+ ; and also that (SP) = src, while having NC
+ ld a,e : sub l : ld l,a
+ ld a,d : sbc h
+@LWM0_CopyMatchLDH ld h,a : ldi : ldir : exa
+@LWM0_PreMainLoop pop hl ; recover src
+
+;==================================================================================================================
+;==================================================================================================================
+;==================================================================================================================
+
+@LWM1: ; LWM = 1
+
+;
+; main decompressor loop
+
+@LWM1_MainLoop: add a : jr z,@LWM1_ReloadByteC0 : jr nc,@LWM0_CASE0 ; "0"+BYTE = copy literal
+@LWM1_Check2ndBit add a : call z,@ReloadByte : jr nc,@LWM1_CASE10 ; "10"+gamma(offset/256)+BYTE+gamma(length) = the main matching mechanism
+ add a : call z,@ReloadByte : jr nc,@LWM0_CASE110 ; "110"+[oooooool] = matched 2-3 bytes with a small offset
+
+;
+; case "111"+"oooo": copy a byte with offset -1..-15, or write zero to dest
+
+@LWM1_CASE111: ld bc,%11100000
+ add a : call z,@ReloadByte : rl c ; read short offset (4 bits)
+ add a : call z,@ReloadByte : rl c ; read short offset (4 bits)
+ add a : call z,@ReloadByte : rl c ; read short offset (4 bits)
+ add a : call z,@ReloadByte : rl c ; read short offset (4 bits)
+ ex de,hl : jr z,@LWM1_WriteZero ; zero offset means "write zero" (NB: B is zero here)
+
+ ; "write a previous byte (1-15 away from dest)"
+ push hl ; BC = offset, DE = src, HL = dest
+ sbc hl,bc ; HL = dest-offset (SBC works because branching above ensured NC)
+ ld b,(hl)
+ pop hl
+
+@LWM1_WriteZero ld (hl),b : ex de,hl
+ inc de : jp @LWM0_MainLoop ; 10+4*(4+10+8)+4+7 + 11+15+7+10 + 7+4+6+10 = 179t
+
+@LWM1_ReloadByteC0 ld a,(hl) : inc hl : rla
+ jp nc,@LWM0_CASE0
+ jr @LWM1_Check2ndBit
+
+;
+; branch "10"+gamma(offset/256)+BYTE+gamma(length): the main matching mechanism
+
+@LWM1_CASE10: ; "use a gamma code * 256 for offset, another gamma code for length"
+ call @GetGammaCoded
+
+ ; the original decompressor contains
+ ;
+ ; if ((LWM == 0) && (offs == 2)) { ... }
+ ; else {
+ ; if (LWM == 0) { offs -= 3; }
+ ; else { offs -= 2; }
+ ; }
+ ;
+ ; so, the idea here is to use the fact that GetGammaCoded returns (offset/256)+2,
+ ; and to split the first condition by noticing that C-1 can never be zero
+ dec c : jp @LWM0_AfterLWM
+
+;
+; the re-use of the previous offset (LWM magic)
+
+@LWM1_KickInLWM: ; "and a new gamma code for length"
+ call @GetGammaCoded ; BC = len
+ push hl
+ exa : ld a,e : sub iyl : ld l,a
+ ld a,d : sbc iyh
+ jp @LWM0_CopyMatchLDH
+
+;==================================================================================================================
+;==================================================================================================================
+;==================================================================================================================
+
+;
+; interlaced gamma code reader
+; x0 -> 1x
+; x1y0 -> 1xy
+; x1y1z0 -> 1xyz etc
+; (technically, this is a 2-based variation of Exp-Golomb-1)
+
+@GetGammaCoded: ld bc,1
+@ReadGamma add a : jr z,@ReloadByteRG1
+ rl c : rl b
+ add a : jr z,@ReloadByteRG2
+ jr c,@ReadGamma : ret
+
+@ReloadByteRG1 ld a,(hl) : inc hl : rla
+ rl c : rl b
+ add a : jr c,@ReadGamma : ret
+
+@ReloadByteRG2 ld a,(hl) : inc hl : rla
+ jr c,@ReadGamma : ret
+
+;
+; pretty usual getbit for mixed datastreams
+
+@ReloadByte: ld a,(hl) : inc hl : rla : ret
+
+MEND
+
diff --git a/tools/rasm/decrunch/unlzsa1_fast.asm b/tools/rasm/decrunch/unlzsa1_fast.asm
new file mode 100644
index 0000000..0e2617f
--- /dev/null
+++ b/tools/rasm/decrunch/unlzsa1_fast.asm
@@ -0,0 +1,204 @@
+;
+; Speed-optimized LZSA1 decompressor by spke & uniabis (109 bytes)
+;
+; ver.00 by spke for LZSA 0.5.4 (03-24/04/2019, 134 bytes);
+; ver.01 by spke for LZSA 0.5.6 (25/04/2019, 110(-24) bytes, +0.2% speed);
+; ver.02 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
+; ver.03 by uniabis (30/07/2019, 109(-1) bytes, +3.5% speed);
+; ver.04 by spke (31/07/2019, small re-organization of macros);
+; ver.05 by uniabis (22/08/2019, 107(-2) bytes, same speed);
+; ver.06 by spke for LZSA 1.0.7 (27/08/2019, 111(+4) bytes, +2.1% speed);
+; ver.07 by spke for LZSA 1.1.0 (25/09/2019, added full revision history);
+; ver.08 by spke for LZSA 1.1.2 (22/10/2019, re-organized macros and added an option for unrolled copying of long matches);
+; ver.09 by spke for LZSA 1.2.1 (02/01/2020, 109(-2) bytes, same speed)
+;
+; The data must be compressed using the command line compressor by Emmanuel Marty
+; The compression is done as follows:
+;
+; lzsa.exe -f1 -r <sourcefile> <outfile>
+;
+; where option -r asks for the generation of raw (frame-less) data.
+;
+; The decompression is done in the standard way:
+;
+; ld hl,FirstByteOfCompressedData
+; ld de,FirstByteOfMemoryForDecompressedData
+; call DecompressLZSA1
+;
+; Backward compression is also supported; you can compress files backward using:
+;
+; lzsa.exe -f1 -r -b <sourcefile> <outfile>
+;
+; and decompress the resulting files using:
+;
+; ld hl,LastByteOfCompressedData
+; ld de,LastByteOfMemoryForDecompressedData
+; call DecompressLZSA1
+;
+; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
+;
+; Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
+; see https://github.com/emmanuel-marty/lzsa for more information
+;
+; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any damages
+; arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must not
+; claim that you wrote the original software. If you use this software
+; in a product, an acknowledgment in the product documentation would be
+; appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must not be
+; misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source distribution.
+
+; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+57 bytes)
+; DEFINE BACKWARD_DECOMPRESS
+
+ IFNDEF BACKWARD_DECOMPRESS
+
+ MACRO NEXT_HL
+ inc hl
+ ENDM
+
+ MACRO ADD_OFFSET
+ ex de,hl : add hl,de
+ ENDM
+
+ MACRO COPY1
+ ldi
+ ENDM
+
+ MACRO COPYBC
+ ldir
+ ENDM
+
+ ELSE
+
+ MACRO NEXT_HL
+ dec hl
+ ENDM
+
+ MACRO ADD_OFFSET
+ ex de,hl : ld a,e : sub l : ld l,a
+ ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
+ ENDM
+
+ MACRO COPY1
+ ldd
+ ENDM
+
+ MACRO COPYBC
+ lddr
+ ENDM
+
+ ENDIF
+
+macro DecompressLZSA1
+ ld b,0 : jr @ReadToken
+
+@NoLiterals: xor (hl) : NEXT_HL : jp m,@LongOffset
+
+@ShortOffset: push de : ld e,(hl) : ld d,#FF
+
+ ; short matches have length 0+3..14+3
+ add 3 : cp 15+3 : jr nc,@LongerMatch
+
+ ; placed here this saves a JP per iteration
+@CopyMatch: ld c,a
+.UseC NEXT_HL : ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
+ ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
+ COPY1 : COPY1 : COPYBC ; BC = 0, DE = dest
+.popSrc pop hl ; HL = src
+
+@ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream,
+ ; where LLL is the number of literals and MMMM is
+ ; a length of the match that follows after the literals
+ ld a,(hl) : and #70 : jr z,@NoLiterals
+
+ cp #70 : jr z,@MoreLiterals ; LLL=7 means 7+ literals...
+ rrca : rrca : rrca : rrca : ld c,a ; LLL<7 means 0..6 literals...
+
+ ld a,(hl) : NEXT_HL
+ COPYBC
+
+ ; the top bit of token is set if the offset contains two bytes
+ and #8F : jp p,@ShortOffset
+
+@LongOffset: ; read second byte of the offset
+ push de : ld e,(hl) : NEXT_HL : ld d,(hl)
+ add -128+3 : cp 15+3 : jp c,@CopyMatch
+
+ IFNDEF UNROLL_LONG_MATCHES
+
+ ; MMMM=15 indicates a multi-byte number of literals
+@LongerMatch: NEXT_HL : add (hl) : jr nc,@CopyMatch
+
+ ; the codes are designed to overflow;
+ ; the overflow value 1 means read 1 extra byte
+ ; and overflow value 0 means read 2 extra bytes
+.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,@CopyMatch.UseC
+.code0 NEXT_HL : ld b,(hl)
+
+ ; the two-byte match length equal to zero
+ ; designates the end-of-data marker
+ ld a,b : or c : jr nz,@CopyMatch.UseC
+ pop de : ret
+
+ ELSE
+
+ ; MMMM=15 indicates a multi-byte number of literals
+@LongerMatch: NEXT_HL : add (hl) : jr c,@VeryLongMatch
+
+ ld c,a
+.UseC NEXT_HL : ex (sp),hl
+ ADD_OFFSET
+ COPY1 : COPY1
+
+ ; this is an unrolled equivalent of LDIR
+ xor a : sub c
+ and 32-1 : add a
+ ld (.jrOffset),a : jr nz,$+2
+.jrOffset EQU $-1
+.fastLDIR repeat 32
+ COPY1
+ rend
+ jp pe,.fastLDIR
+ jp @CopyMatch.popSrc
+
+@VeryLongMatch: ; the codes are designed to overflow;
+ ; the overflow value 1 means read 1 extra byte
+ ; and overflow value 0 means read 2 extra bytes
+.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,@LongerMatch.UseC
+.code0 NEXT_HL : ld b,(hl)
+
+ ; the two-byte match length equal to zero
+ ; designates the end-of-data marker
+ ld a,b : or c : jr nz,@LongerMatch.UseC
+ pop de : ret
+
+ ENDIF
+
+@MoreLiterals: ; there are three possible situations here
+ xor (hl) : NEXT_HL : exa
+ ld a,7 : add (hl) : jr c,@ManyLiterals
+
+@CopyLiterals: ld c,a
+.UseC NEXT_HL : COPYBC
+
+ exa : jp p,@ShortOffset : jr @LongOffset
+
+@ManyLiterals:
+.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,@CopyLiterals.UseC
+.code0 NEXT_HL : ld b,(hl) : jr @CopyLiterals.UseC
+
+mend
+
+
+
diff --git a/tools/rasm/decrunch/unlzsa2_fast.asm b/tools/rasm/decrunch/unlzsa2_fast.asm
new file mode 100755
index 0000000..8c6b5b1
--- /dev/null
+++ b/tools/rasm/decrunch/unlzsa2_fast.asm
@@ -0,0 +1,189 @@
+;
+; Speed-optimized LZSA2 decompressor by spke & uniabis (216 bytes)
+;
+
+ DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+38 bytes)
+; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
+
+ IFNDEF BACKWARD_DECOMPRESS
+
+ MACRO NEXT_HL
+ inc hl
+ ENDM
+
+ MACRO ADD_OFFSET
+ ex de,hl : add hl,de
+ ENDM
+
+ MACRO COPY1
+ ldi
+ ENDM
+
+ MACRO COPYBC
+ ldir
+ ENDM
+
+ ELSE
+
+ MACRO NEXT_HL
+ dec hl
+ ENDM
+
+ MACRO ADD_OFFSET
+ ex de,hl : ld a,e : sub l : ld l,a
+ ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
+ ENDM
+
+ MACRO COPY1
+ ldd
+ ENDM
+
+ MACRO COPYBC
+ lddr
+ ENDM
+
+ ENDIF
+
+
+macro DecompressLZSA2
+@lzsa2
+ ; A' stores next nibble as %1111.... or assumed to contain trash
+ ; B is assumed to be 0
+ ld b,0 : scf : exa : jr .ReadToken
+
+.ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,.CopyLiterals
+ ld c,(hl) : NEXT_HL
+ ld a,b : ld b,(hl)
+ jr .NEXTHLuseBC
+
+
+.MoreLiterals: ld b,(hl) : NEXT_HL
+ scf : exa : jr nc,.noUpdatemoar
+
+ ld a,(hl) : or #F0 : exa
+ ld a,(hl) : NEXT_HL : or #0F
+ rrca : rrca : rrca : rrca
+
+.noUpdatemoar ;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
+ inc a : jr z,.ManyLiterals : sub #F0-3+1
+
+.CopyLiterals: ld c,a : ld a,b : ld b,0
+ COPYBC
+ push de : or a : jp p,.CASE0xx ;: jr CASE1xx
+
+ cp %11000000 : jr c,.CASE10x
+
+.CASE11x cp %11100000 : jr c,.CASE110
+
+ ; "111": repeated offset
+.CASE111: ld de,ix : jr .MatchLen
+
+
+.Literals0011: jr nz,.MoreLiterals
+
+ ; if "LL" of the byte token is equal to 0,
+ ; there are no literals to copy
+.NoLiterals: or (hl) : NEXT_HL
+ push de : jp m,.CASE1xx
+
+ ; short (5 or 9 bit long) offsets
+.CASE0xx ld d,#FF : cp %01000000 : jr c,.CASE00x
+
+ ; "01x": the case of the 9-bit offset
+.CASE01x: cp %01100000 : rl d
+
+.ReadOffsetE ld e,(hl) : NEXT_HL
+
+.SaveOffset: LD ix,de
+
+.MatchLen: inc a : and %00000111 : jr z,.LongerMatch : inc a
+
+.CopyMatch: ld c,a
+;.useC
+ ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
+ ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
+ COPY1
+ COPYBC
+.popSrc pop hl
+
+ ; compressed data stream contains records
+ ; each record begins with the byte token "XYZ|LL|MMM"
+.ReadToken: ld a,(hl) : and %00011000 : jp pe,.Literals0011 ; process the cases 00 and 11 separately
+
+ rrca : rrca : rrca
+
+ ld c,a : ld a,(hl) ; token is re-read for further processing
+.NEXTHLuseBC NEXT_HL
+ COPYBC
+
+ ; the token and literals are followed by the offset
+ push de : or a : jp p,.CASE0xx
+
+.CASE1xx cp %11000000 : jr nc,.CASE11x
+
+ ; "10x": the case of the 13-bit offset
+.CASE10x: ld c,a : exa : jr nc,.noUpdatecase10x
+
+ ld a,(hl) : or #F0 : exa
+ ld a,(hl) : NEXT_HL : or #0F
+ rrca : rrca : rrca : rrca
+
+.noUpdatecase10x ld d,a : ld a,c
+ cp %10100000 : dec d : rl d : jr .ReadOffsetE
+
+
+
+ ; "110": 16-bit offset
+.CASE110: ld d,(hl) : NEXT_HL : jr .ReadOffsetE
+
+
+
+
+ ; "00x": the case of the 5-bit offset
+.CASE00x: ld c,a : exa : jr nc,.noUpdatecase00x
+
+ ld a,(hl) : or #F0 : exa
+ ld a,(hl) : NEXT_HL : or #0F
+ rrca : rrca : rrca : rrca
+
+.noUpdatecase00x ld e,a : ld a,c
+ cp %00100000 : rl e : jp .SaveOffset
+
+
+.LongerMatch: scf : exa : jr nc,.noUpdatelongermatch
+
+ ld a,(hl) : or #F0 : exa
+ ld a,(hl) : NEXT_HL : or #0F
+ rrca : rrca : rrca : rrca
+
+.noUpdatelongermatch sub #F0-9 : cp 15+9 : jr c,.CopyMatch
+
+
+.LongMatch: add (hl) : NEXT_HL : jr c,.VeryLongMatch
+
+ ld c,a
+.useC ex (sp),hl
+ ADD_OFFSET
+ COPY1
+
+ ; this is an unrolled equivalent of LDIR
+ xor a : sub c
+ and 32-1 : add a
+ ld (.jrOffset),a : jr nz,$+2
+.jrOffset EQU $-1
+.fastLDIR repeat 32
+ COPY1
+ rend
+ jp pe,.fastLDIR
+ jp .popSrc
+
+.VeryLongMatch: ld c,(hl) : NEXT_HL
+ ld b,(hl) : NEXT_HL : jr nz,.useC
+ pop de : ret
+
+mend
+
+
+
+
+