Initial import of the open source release

author: Juan J. Martinez <jjm@usebox.net> 2023-11-05 11:22:55 +0000
committer: Juan J. Martinez <jjm@usebox.net> 2023-11-05 11:31:28 +0000
commit: 2fbdf974338bde8576efdae40a819a76b2391033 (patch)
tree: 64d41a37470143f142344f9a439d96de3e7918c2 /tools/apultra_src/asm
download: kitsunes-curse-2fbdf974338bde8576efdae40a819a76b2391033.tar.gz
kitsunes-curse-2fbdf974338bde8576efdae40a819a76b2391033.zip
3 files changed, 711 insertions, 0 deletions
diff --git a/tools/apultra_src/asm/6502/aplib_6502.asm b/tools/apultra_src/asm/6502/aplib_6502.asm
new file mode 100644
index 0000000..219ac86
--- /dev/null
+++ b/tools/apultra_src/asm/6502/aplib_6502.asm
@@ -0,0 +1,371 @@
+; ***************************************************************************
+; ***************************************************************************
+;
+; aplib_6502.s
+;
+; NMOS 6502 decompressor for data stored in Jorgen Ibsen's aPLib format.
+;
+; Includes support for Emmanuel Marty's enhancements to the aPLib format.
+;
+; The code is 252 bytes long for standard format, 270 for enhanced format.
+;
+; This code is written for the ACME assembler.
+;
+; Copyright John Brandwood 2019.
+;
+; Distributed under the Boost Software License, Version 1.0.
+; (See accompanying file LICENSE_1_0.txt or copy at
+;  http://www.boost.org/LICENSE_1_0.txt)
+;
+; ***************************************************************************
+; ***************************************************************************
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; Decompression Options & Macros
+;
+
+                ;
+                ; Use the enhanced format from Emmanuel Marty's APULTRA?
+                ;
+                ; The enhancements speed up decompression on an 8-bit CPU.
+                ;
+                ; This gives an 11% improvement in decompresison speed, but
+                ; breaks compatibility with standard aPLib encoders.
+                ;
+
+APL_ENHANCED    =       0
+
+                ;
+                ; Assume that we're decompessing from a large multi-bank
+                ; compressed data file, and that the next bank may need to
+                ; paged in when a page-boundary is crossed.
+                ;
+
+APL_FROM_BANK   =       0
+
+                ;
+                ; Macro to increment the source pointer to the next page.
+                ;
+
+                !if     APL_FROM_BANK {
+                        !macro  APL_INC_PAGE {
+                        jsr     .next_page
+                        }
+                } else {
+                        !macro  APL_INC_PAGE {
+                        inc     <apl_srcptr + 1
+                        }
+                }
+
+                ;
+                ; Macro to read a byte from the compressed source data.
+                ;
+
+                !macro   APL_GET_SRC {
+                lda     (apl_srcptr),y
+                inc     <apl_srcptr + 0
+                bne     .skip
+                +APL_INC_PAGE
+.skip:
+                }
+
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; Data usage is last 12 bytes of zero-page.
+;
+
+                !if     APL_ENHANCED {
+apl_nibflg      =       $F4                     ; 1 byte.
+apl_nibble      =       $F5                     ; 1 byte.
+apl_egamma      =       $F6                     ; 1 byte.
+                }
+
+apl_bitbuf      =       $F7                     ; 1 byte.
+apl_offset      =       $F8                     ; 1 word.
+apl_winptr      =       $FA                     ; 1 word.
+apl_srcptr      =       $FC                     ; 1 word.
+apl_dstptr      =       $FE                     ; 1 word.
+apl_length      =       apl_winptr
+
+
+; ***************************************************************************
+; ***************************************************************************
+;
+; apl_decompress - Decompress data stored in Jorgen Ibsen's aPLib format.
+;
+; Args: apl_srcptr = ptr to compessed data
+; Args: apl_dstptr = ptr to output buffer
+; Uses: lots!
+;
+; If compiled with APL_FROM_BANK, then apl_srcptr should be within the bank
+; window range.
+;
+; As an optimization, the code to handle window offsets > 64768 bytes has
+; been removed, since these don't occur with a 16-bit address range.
+;
+; As an optimization, the code to handle window offsets > 32000 bytes can
+; be commented-out, since these don't occur in typical 8-bit computer usage.
+;
+
+apl_decompress: ldy     #0                      ; Initialize source index.
+
+                lda     #$80                    ; Initialize an empty
+                sta     <apl_bitbuf             ; bit-buffer.
+
+                !if     APL_ENHANCED {
+                sta     <apl_egamma             ; Bit-buffer for gamma pairs.
+                sty     <apl_nibflg             ; Reset the flag.
+                }
+
+                ;
+                ; 0 bbbbbbbb - One byte from compressed data, i.e. a "literal".
+                ;
+
+.literal:       +APL_GET_SRC
+
+.write_byte:    ldx     #0                      ; LWM=0.
+
+                sta     (apl_dstptr),y          ; Write the byte directly to
+                inc     <apl_dstptr + 0         ; the output.
+                bne     .next_tag
+                inc     <apl_dstptr + 1
+
+.next_tag:      asl     <apl_bitbuf             ; 0 bbbbbbbb
+                bne     .skip0
+                jsr     .load_bit
+.skip0:         bcc     .literal
+
+.skip1:         asl     <apl_bitbuf             ; 1 0 <offset> <length>
+                bne     .skip2
+                jsr     .load_bit
+.skip2:         bcc     .copy_large
+
+                asl     <apl_bitbuf             ; 1 1 0 dddddddn
+                bne     .skip3
+                jsr     .load_bit
+.skip3:         bcc     .copy_normal
+
+                ; 1 1 1 dddd - Copy 1 byte within 15 bytes (or zero).
+
+                !if     APL_ENHANCED {
+
+.copy_short:    lsr     <apl_nibflg             ; Is there a nibble waiting?
+                lda     <apl_nibble             ; Extract the lo-nibble.
+                bcs     .skip4
+
+                inc     <apl_nibflg             ; Reset the flag.
+                +APL_GET_SRC
+                sta     <apl_nibble             ; Preserve for next time.
+                lsr                             ; Extract the hi-nibble.
+                lsr
+                lsr
+                lsr
+
+.skip4:         and     #$0F                    ; Current nibble.
+                beq     .write_byte             ; Offset=0 means write zero.
+
+                } else {
+
+.copy_short:    lda     #$10
+.nibble_loop:   asl     <apl_bitbuf
+                bne     .skip4
+                pha
+                jsr     .load_bit
+                pla
+.skip4:         rol
+                bcc     .nibble_loop
+                beq     .write_byte             ; Offset=0 means write zero.
+
+                }
+
+                eor     #$FF                    ; Read the byte directly from
+                tay                             ; the destination window.
+                iny
+                dec     <apl_dstptr + 1
+                lda     (apl_dstptr),y
+                inc     <apl_dstptr + 1
+                ldy     #0
+                beq     .write_byte
+
+                ;
+                ; 1 1 0 dddddddn - Copy 2 or 3 within 128 bytes.
+                ;
+
+.copy_normal:   +APL_GET_SRC                    ; 1 1 0 dddddddn
+                lsr
+                beq     .finished               ; Offset 0 == EOF.
+
+                sta     <apl_offset + 0         ; Preserve offset.
+                sty     <apl_offset + 1
+                tya                             ; Y == 0.
+                tax                             ; Bits 8..15 of length.
+                adc     #2                      ; Bits 0...7 of length.
+                bne     .do_match               ; NZ from previous ADC.
+
+                ;
+                ; Subroutines for byte & bit handling.
+                ;
+
+                !if     APL_ENHANCED {
+
+.get_gamma:     lda     #1                      ; Get a gamma-coded value.
+.gamma_loop:    asl     <apl_egamma
+                bne     .rotate_gamma
+                pha
+                +APL_GET_SRC                    ; Reload an empty bit-buffer
+                rol                             ; from the compressed source.
+                sta     <apl_egamma
+                pla
+.rotate_gamma:  rol
+                bcs     .big_gamma              ; Got 8 bits, now read rest.
+                asl     <apl_egamma
+                bcc     .gamma_loop
+                rts                             ; Always returns CS.
+
+.big_gamma:     pha                             ; Read remaining bits of length
+                tya                             ; larger than 255. This is very
+                jsr     .rotate_gamma           ; rare, so it saves cycles on
+                tax                             ; the 6502 to do it this way.
+                pla
+
+.finished:      rts                             ; All decompressed!
+
+                } else {
+
+.get_gamma:     lda     #1                      ; Get a gamma-coded value.
+.gamma_loop:    asl     <apl_bitbuf
+                bne     .skip5
+                pha
+                jsr     .load_bit
+                pla
+.skip5:         rol
+                rol     <apl_length + 1
+                asl     <apl_bitbuf
+                bne     .skip6
+                pha
+                jsr     .load_bit
+                pla
+.skip6:         bcs     .gamma_loop
+
+.finished:      rts                             ; All decompressed!
+
+                }
+
+                ;
+                ; 1 0 <offset> <length> - gamma-coded LZSS pair.
+                ;
+
+                !if     APL_ENHANCED {
+
+.copy_large:    jsr     .get_gamma              ; Bits 8..15 of offset (min 2).
+
+                cpx     #1                      ; CC if LWM==0, CS if LWM==1.
+                ldx     #0                      ; Clear hi-byte of length.
+                sbc     #2                      ; -3 if LWM==0, -2 if LWM==1.
+                bcs     .normal_pair            ; CC if LWM==0 && offset==2.
+
+                jsr     .get_gamma              ; Get length (A=lo-byte & CS).
+                bcs     .do_match               ; Use previous Offset.
+
+.normal_pair:   sta     <apl_offset + 1         ; Save bits 8..15 of offset.
+
+                +APL_GET_SRC
+                sta     <apl_offset + 0         ; Save bits 0...7 of offset.
+
+                jsr     .get_gamma              ; Get length (A=lo-byte & CS).
+
+                } else {
+
+.copy_large:    jsr     .get_gamma              ; Bits 8..15 of offset (min 2).
+                sty     <apl_length + 1         ; Clear hi-byte of length.
+
+                cpx     #1                      ; CC if LWM==0, CS if LWM==1.
+                sbc     #2                      ; -3 if LWM==0, -2 if LWM==1.
+                bcs     .normal_pair            ; CC if LWM==0 && offset==2.
+
+                jsr     .get_gamma              ; Get length (A=lo-byte & CC).
+                ldx     <apl_length + 1
+                bcc     .do_match               ; Use previous Offset.
+
+.normal_pair:   sta     <apl_offset + 1         ; Save bits 8..15 of offset.
+
+                +APL_GET_SRC
+                sta     <apl_offset + 0         ; Save bits 0...7 of offset.
+
+                jsr     .get_gamma              ; Get length (A=lo-byte & CC).
+                ldx     <apl_length + 1
+
+                }
+
+                ldy     <apl_offset + 1         ; If offset <    256.
+                beq     .lt256
+                cpy     #$7D                    ; If offset >= 32000, length += 2.
+                bcs     .match_plus2
+                cpy     #$05                    ; If offset >=  1280, length += 1.
+                bcs     .match_plus1
+                bcc     .do_match
+.lt256:         ldy     <apl_offset + 0         ; If offset <    128, length += 2.
+                bmi     .do_match
+
+                !if     APL_ENHANCED {
+                } else {
+                sec                             ; aPLib gamma returns with CC.
+                }
+
+.match_plus2:   adc     #1                      ; CS, so ADC #2.
+                bcs     .match_plus256
+
+.match_plus1:   adc     #0                      ; CS, so ADC #1, or CC if fall
+                bcc     .do_match               ; through from .match_plus2.
+
+.match_plus256: inx
+
+.do_match:      eor     #$FF                    ; Negate the lo-byte of length
+                tay                             ; and check for zero.
+                iny
+                beq     .calc_addr
+                eor     #$FF
+
+                inx                             ; Increment # of pages to copy.
+
+                clc                             ; Calc destination for partial
+                adc     <apl_dstptr + 0         ; page.
+                sta     <apl_dstptr + 0
+                bcs     .calc_addr
+                dec     <apl_dstptr + 1
+
+.calc_addr:     sec                             ; Calc address of match.
+                lda     <apl_dstptr + 0
+                sbc     <apl_offset + 0
+                sta     <apl_winptr + 0
+                lda     <apl_dstptr + 1
+                sbc     <apl_offset + 1
+                sta     <apl_winptr + 1
+
+.copy_page:     lda     (apl_winptr),y
+                sta     (apl_dstptr),y
+                iny
+                bne     .copy_page
+                inc     <apl_winptr + 1
+                inc     <apl_dstptr + 1
+                dex                             ; Any full pages left to copy?
+                bne     .copy_page
+
+                inx                             ; LWM=1.
+                jmp     .next_tag
+
+                ;
+                ; Subroutines for byte & bit handling.
+                ;
+
+.load_bit:      +APL_GET_SRC                    ; Reload an empty bit-buffer
+                rol                             ; from the compressed source.
+                sta     <apl_bitbuf
+                rts
diff --git a/tools/apultra_src/asm/ARM7TDMI/aplib_arm.s b/tools/apultra_src/asm/ARM7TDMI/aplib_arm.s
new file mode 100644
index 0000000..b6d0cef
--- /dev/null
+++ b/tools/apultra_src/asm/ARM7TDMI/aplib_arm.s
@@ -0,0 +1,150 @@
+@APlib ARM7 decompressor by Dan Weiss, based on the original C version
+@Takes in raw apacked data, NOT data created by the 'safe' compressor.
+@Code is from the PocketNES NES Emulator for GBA
+
+@Code is formatted for GNU Assembler
+
+ src .req r0
+ dest .req r1
+ byte .req r2
+ mask .req r3
+ gamma .req r4
+ lwm .req r6
+ recentoff .req r7
+ temp .req r8
+
+.global depack
+.type   depack STT_FUNC
+
+@r0 = src
+@r1 = dest
+@r2 = byte
+@r3 = rotating bit mask
+@r4 = increasing gamma
+@r6 = lwm
+@r7 = recentoff
+@r8 = lr copy/scratch
+
+	.macro GETBIT @3 instructions
+	movs mask,mask,ror #1
+	ldrcsb byte,[src],#1
+	tst byte,mask
+	.endm
+
+	.macro GETBITGAMMA @5 instructions
+	mov gamma,gamma,lsl #1
+	GETBIT
+	addne gamma,gamma,#1
+	.endm
+
+@This initilaiztion code can go into slow memory
+
+depack:
+	stmfd sp!,{r4-r10,lr}
+	ldrb temp,[src],#1
+	strb temp,[dest],#1
+	ldr mask,=0x01010101
+	b aploop_nolwm
+
+@This inner-loop code should be placed into fast memory
+
+	@depack enters here
+aploop_nolwm:
+	mov lwm,#0
+aploop:
+	GETBIT
+	bne apbranch1
+	ldrb temp,[src],#1
+	strb temp,[dest],#1
+	b aploop_nolwm
+apbranch1:
+	GETBIT
+	beq apbranch2
+	GETBIT
+	beq apbranch3
+	@get an offset
+	mov gamma,#0
+	GETBIT
+	addne gamma,gamma,#1
+	GETBITGAMMA
+	GETBITGAMMA
+	GETBITGAMMA
+	cmp gamma,#0
+	ldrneb gamma,[dest,-gamma]
+	strb gamma,[dest],#1
+	b aploop_nolwm
+apbranch3:
+	@use 7 bit offset, length = 2 or 3
+	@if a zero is encountered here, it's EOF
+	ldrb gamma,[src],#1
+	movs recentoff,gamma,lsr #1
+	beq done
+	ldrcsb temp,[dest,-recentoff]
+	strcsb temp,[dest],#1
+	ldrb temp,[dest,-recentoff]
+	strb temp,[dest],#1
+	ldrb temp,[dest,-recentoff]
+	strb temp,[dest],#1
+	mov lwm,#1
+	b aploop
+apbranch2:
+	@use a gamma code * 256 for offset, another gamma code for length
+
+	bl ap_getgamma
+	sub gamma,gamma,#2
+	cmp lwm,#0
+	bne ap_is_lwm
+	mov lwm,#1
+	cmp gamma,#0
+	bne ap_not_zero_gamma
+
+	@if gamma code is 2, use old recent offset, and a new gamma code for length
+	bl ap_getgamma
+copyloop1:
+	ldrb temp,[dest,-recentoff]
+	strb temp,[dest],#1
+	subs gamma,gamma,#1
+	bne copyloop1
+	b aploop
+	
+ap_not_zero_gamma:
+	sub gamma,gamma,#1
+ap_is_lwm:
+	ldrb temp,[src],#1
+	add recentoff,temp,gamma,lsl #8
+	bl ap_getgamma
+	@gamma=length
+	cmp recentoff,#32000
+	addge gamma,gamma,#1
+	cmp recentoff,#1280
+	addge gamma,gamma,#1
+	cmp recentoff,#128
+	addlt gamma,gamma,#2
+copyloop2:
+	ldrb temp,[dest,-recentoff]
+	strb temp,[dest],#1
+	subs gamma,gamma,#1
+	bne copyloop2
+	b aploop
+
+ap_getgamma:
+	mov gamma,#1
+ap_getgammaloop:
+	GETBITGAMMA
+	GETBIT
+	bne ap_getgammaloop
+	bx lr
+
+done:
+	ldmfd sp!,{r4-r10,lr}
+	bx lr
+
+.unreq src
+.unreq dest
+.unreq byte
+.unreq mask
+.unreq gamma
+.unreq lwm
+.unreq recentoff
+.unreq temp
+
diff --git a/tools/apultra_src/asm/Z80/aplib_z80.asm b/tools/apultra_src/asm/Z80/aplib_z80.asm
new file mode 100644
index 0000000..6843a14
--- /dev/null
+++ b/tools/apultra_src/asm/Z80/aplib_z80.asm
@@ -0,0 +1,190 @@
+;Z80 Version by Dan Weiss
+;Call depack.
+;hl = source
+;de = dest
+
+ap_bits: .db 0
+ap_byte: .db 0
+lwm:	.db 0
+r0:	.dw 0
+
+ap_getbit:
+	push bc
+		ld bc,(ap_bits)
+		rrc c
+		jr nc,ap_getbit_continue
+		ld b,(hl)
+		inc hl
+ap_getbit_continue:
+		ld a,c
+		and b
+		ld (ap_bits),bc
+	pop bc
+	ret
+
+ap_getbitbc: ;doubles BC and adds the read bit
+	sla c
+	rl b
+	call ap_getbit
+	ret z
+	inc bc
+	ret
+
+ap_getgamma:
+	ld bc,1
+ap_getgammaloop:
+	call ap_getbitbc
+	call ap_getbit
+	jr nz,ap_getgammaloop
+	ret
+
+
+depack:
+	;hl = source
+	;de = dest
+	ldi
+	xor a
+	ld (lwm),a
+	inc a
+	ld (ap_bits),a
+	
+aploop:
+	call ap_getbit
+	jp z, apbranch1
+	call ap_getbit
+	jr z, apbranch2
+	call ap_getbit
+	jr z, apbranch3
+	;LWM = 0
+	xor a
+	ld (lwm),a
+	;get an offset
+	ld bc,0
+	call ap_getbitbc
+	call ap_getbitbc
+	call ap_getbitbc
+	call ap_getbitbc
+	ld a,b
+	or c
+	jr nz,apbranch4
+	xor a  ;write a 0
+	ld (de),a
+	inc de
+	jr aploop
+apbranch4:
+	ex de,hl ;write a previous bit (1-15 away from dest)
+	push hl
+		sbc hl,bc
+		ld a,(hl)
+	pop hl
+	ld (hl),a
+	inc hl
+	ex de,hl
+	jr aploop
+apbranch3:
+	;use 7 bit offset, length = 2 or 3
+	;if a zero is encountered here, it's EOF
+	ld c,(hl)
+	inc hl
+	rr c
+	ret z
+	ld b,2
+	jr nc,ap_dont_inc_b
+	inc b
+ap_dont_inc_b:
+	;LWM = 1
+	ld a,1
+	ld (lwm),a
+	
+	push hl
+		ld a,b
+		ld b,0
+		;R0 = c
+		ld (r0),bc
+		ld h,d
+		ld l,e
+		or a
+		sbc hl,bc
+		ld c,a
+		ldir
+	pop hl
+	jr aploop
+apbranch2:
+	;use a gamma code * 256 for offset, another gamma code for length
+	call ap_getgamma
+	dec bc
+	dec bc
+	ld a,(lwm)
+	or a
+	jr nz,ap_not_lwm
+	;bc = 2?
+	ld a,b
+	or c
+	jr nz,ap_not_zero_gamma
+	;if gamma code is 2, use old r0 offset, and a new gamma code for length
+	call ap_getgamma
+	push hl
+		ld h,d
+		ld l,e
+		push bc
+			ld bc,(r0)
+			sbc hl,bc
+		pop bc
+		ldir
+	pop hl
+	jr ap_finishup
+	
+ap_not_zero_gamma:
+	dec bc
+ap_not_lwm:
+	;do I even need this code?
+	;bc=bc*256+(hl), lazy 16bit way
+	ld b,c
+	ld c,(hl)
+	inc hl
+	ld (r0),bc
+	push bc
+		call ap_getgamma
+		ex (sp),hl
+		;bc = len, hl=offs
+		push de
+			ex de,hl
+			;some comparison junk for some reason
+			ld hl,31999
+			or a
+			sbc hl,de
+			jr nc,skip1
+			inc bc
+skip1:
+			ld hl,1279
+			or a
+			sbc hl,de
+			jr nc,skip2
+			inc bc
+skip2:
+			ld hl,127
+			or a
+			sbc hl,de
+			jr c,skip3
+			inc bc
+			inc bc
+skip3:
+			;bc = len, de = offs, hl=junk
+		pop hl
+		push hl
+			or a
+			sbc hl,de
+		pop de
+		;hl=dest-offs, bc=len, de = dest
+		ldir
+	pop hl
+ap_finishup:
+	ld a,1
+	ld (lwm),a
+	jp aploop
+
+apbranch1:
+	ldi
+	xor a
+	ld (lwm),a
+	jp aploop
author	Juan J. Martinez <jjm@usebox.net>	2023-11-05 11:22:55 +0000
committer	Juan J. Martinez <jjm@usebox.net>	2023-11-05 11:31:28 +0000
commit	2fbdf974338bde8576efdae40a819a76b2391033 (patch)
tree	64d41a37470143f142344f9a439d96de3e7918c2 /tools/apultra_src/asm
download	kitsunes-curse-2fbdf974338bde8576efdae40a819a76b2391033.tar.gz kitsunes-curse-2fbdf974338bde8576efdae40a819a76b2391033.zip