summaryrefslogtreecommitdiff
path: root/tools/apultra/asm/8088/aplib_8088_fast.S
diff options
context:
space:
mode:
authorJuan J. Martinez <jjm@usebox.net>2021-01-09 09:01:05 +0000
committerJuan J. Martinez <jjm@usebox.net>2021-01-09 09:01:05 +0000
commit9bcf1e97960c0da7322a868efdbc07e2650716fe (patch)
treede6d32ad5b0e567991bd3eb262902c15a77074d9 /tools/apultra/asm/8088/aplib_8088_fast.S
parent3b31adf01305e522f7e28c1435fb47418ce43267 (diff)
downloadubox-msx-lib-9bcf1e97960c0da7322a868efdbc07e2650716fe.tar.gz
ubox-msx-lib-9bcf1e97960c0da7322a868efdbc07e2650716fe.zip
Extra libs: ap.lib
aPLib support with apultra.
Diffstat (limited to 'tools/apultra/asm/8088/aplib_8088_fast.S')
-rw-r--r--tools/apultra/asm/8088/aplib_8088_fast.S178
1 files changed, 178 insertions, 0 deletions
diff --git a/tools/apultra/asm/8088/aplib_8088_fast.S b/tools/apultra/asm/8088/aplib_8088_fast.S
new file mode 100644
index 0000000..c535234
--- /dev/null
+++ b/tools/apultra/asm/8088/aplib_8088_fast.S
@@ -0,0 +1,178 @@
+; aplib_8088_fast.S - speed-optimized aPLib decompressor for 8088 - 188 bytes
+;
+; Copyright (C) 2019 Emmanuel Marty
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any damages
+; arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must not
+; claim that you wrote the original software. If you use this software
+; in a product, an acknowledgment in the product documentation would be
+; appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must not be
+; misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source distribution.
+
+ segment .text
+ bits 16
+
+; ---------------------------------------------------------------------------
+; Decompress aPLib data
+; inputs:
+; * ds:si: compressed aPLib data
+; * es:di: output buffer
+; output:
+; * ax: decompressed size
+; ---------------------------------------------------------------------------
+
+%macro apl_get_bit 0 ; read bit into carry
+ add al,al ; shift bit queue, and high bit into carry
+ jnz %%gotbit ; queue not empty, bits remain
+ lodsb ; read 8 new bits
+ adc al,al ; shift bit queue, and high bit into carry
+%%gotbit:
+%endmacro
+
+apl_decompress:
+ push di ; remember decompression offset
+ cld ; make string operations go forward
+
+ ; === register map ===
+ ; al: bit queue
+ ; ah: unused, but value is trashed
+ ; bx: follows_literal
+ ; cx: scratch register for reading gamma2 codes and storing copy length
+ ; dx: match offset (and rep-offset)
+ ; si: input (compressed data) pointer
+ ; di: output (decompressed data) pointer
+ ; bp: temporary value, trashed
+
+ mov al,080H ; clear bit queue(al) and set high bit to move into carry
+ xor dx,dx ; invalidate rep offset
+
+.literal:
+ movsb ; read and write literal byte
+.next_command_after_literal:
+ mov bx,03H ; set follows_literal(bx) to 3
+
+.next_command:
+ apl_get_bit ; read 'literal or match' bit
+ jnc .literal ; if 0: literal
+
+ ; 1x: match
+
+ apl_get_bit ; read '8+n bits or other type' bit
+ jc .other ; 11x: other type of match
+
+ ; 10: 8+n bits match
+ call .get_gamma2 ; read gamma2-coded high offset bits
+ sub cx,bx ; high offset bits == 2 when follows_literal == 3 ?
+ ; (a gamma2 value is always >= 2, so substracting follows_literal when it
+ ; is == 2 will never result in zero)
+ jae .not_repmatch ; if not, not a rep-match
+
+ call .get_gamma2 ; read match length
+ jmp short .got_len ; go copy
+
+.not_repmatch:
+ mov dh,cl ; transfer high offset bits to dh
+ mov dl,[si] ; read low offset byte in dl
+ inc si
+
+ call .get_gamma2 ; read match length
+ cmp dh,07DH ; offset >= 32000 ?
+ jae .increase_len_by2 ; if so, increase match len by 2
+ cmp dh,05H ; offset >= 1280 ?
+ jae .increase_len_by1 ; if so, increase match len by 1
+ cmp dx,0080H ; offset < 128 ?
+ jae .got_len ; if so, increase match len by 2, otherwise it would be a 7+1 copy
+.increase_len_by2:
+ inc cx ; increase length
+.increase_len_by1:
+ inc cx ; increase length
+
+ ; copy cx bytes from match offset dx
+
+.got_len:
+ push ds ; save ds:si (current pointer to compressed data)
+ mov bp,si
+
+ push es
+ pop ds
+ mov si,di ; point to destination in es:di - offset in dx
+ sub si,dx
+ rep movsb ; copy matched bytes
+
+ mov si,bp ; restore ds:si
+ pop ds
+
+ mov bl,02H ; set follows_literal to 2 (bx is unmodified by match commands)
+ jmp short .next_command
+
+ ; read gamma2-coded value into cx
+
+.get_gamma2:
+ xor cx,cx ; initialize to 1 so that value will start at 2
+ inc cx ; when shifted left in the adc below
+
+.gamma2_loop:
+ apl_get_bit ; read data bit
+ adc cx,cx ; shift into cx
+ apl_get_bit ; read continuation bit
+ jc .gamma2_loop ; loop until a zero continuation bit is read
+
+ ret
+
+ ; handle 7 bits offset + 1 bit len or 4 bits offset / 1 byte copy
+
+.other:
+ xor cx,cx
+ apl_get_bit ; read '7+1 match or short literal' bit
+ jc .short_literal ; 111: 4 bit offset for 1-byte copy
+
+ ; 110: 7 bits offset + 1 bit length
+
+ mov dl,[si] ; read offset + length in dl
+ inc si
+
+ inc cx ; prepare cx for length below
+ shr dl,1 ; shift len bit into carry, and offset in place
+ je .done ; if zero offset: EOD
+ adc cx,cx ; len in cx: 1*2 + carry bit = 2 or 3
+
+ xor dh,dh ; clear high bits of offset
+ jmp short .got_len
+
+ ; 4 bits offset / 1 byte copy
+
+.short_literal:
+ apl_get_bit ; read 4 offset bits
+ adc cl,cl
+ apl_get_bit
+ adc cl,cl
+ apl_get_bit
+ adc cl,cl
+ apl_get_bit
+ adc cl,cl
+ xchg ax,cx ; preserve bit queue in cx, put offset in ax
+ jz .write_zero ; if offset is 0, write a zero byte
+
+ ; short offset 1-15
+ mov bx,di ; point to destination in es:di - offset in ax
+ sub bx,ax ; we trash bx, it will be reset to 3 when we loop
+ mov al,[es:bx] ; read byte from short offset
+.write_zero:
+ stosb ; copy matched byte
+ mov ax,cx ; restore bit queue in al
+ jmp .next_command_after_literal
+
+.done:
+ pop ax ; retrieve the original decompression offset
+ xchg di,ax ; compute decompressed size
+ sub ax,di
+ ret