diff --git a/bin/vasmarm_std_win32.exe b/bin/vasmarm_std_win32.exe new file mode 100644 index 0000000..678a256 Binary files /dev/null and b/bin/vasmarm_std_win32.exe differ diff --git a/doom-fire.asm b/doom-fire.asm index 155f269..68dfa81 100644 --- a/doom-fire.asm +++ b/doom-fire.asm @@ -479,31 +479,22 @@ plot_horizontal_line: add r0, r2, r7 subs r0, r0, #1 ; dest_x += (rnd & 3)-1 movlt r0, #0 ; or MOD ScreenWidth - - ; update colour with some randomness -; and r7, r7, #1 ; rnd & 1 subs r4, r4, r7, lsr #1 ; colour -= rnd & 1 .endm .macro PLOT_PIXEL_BIT - add r6, r12, r0, lsr #1 ; r6 = dest_start + x DIV 2 - - ldrb r11, [r6] ; load screen byte - tst r0, #1 ; odd or even pixel? - andeq r11, r11, #0xF0 ; mask out left hand pixel - orreq r11, r11, r4 ; mask in colour as left hand pixel - andne r11, r11, #0x0F ; mask out right hand pixel - orrne r11, r11, r4, lsl #4 ; mask in colour as right hand pixel - strb r11, [r6] ; store screen byte + orr r11, r4, r4, lsl #4 + strb r11, [r12, r0] ; dest_start + x + strb r11, [r6, r0] ; dest_start + x .endm ; DOOM FIRE! ; R0 = x ; R1 = y ; R2 = x -; R3 = y +; R3 = end of screen address (for loop termination) ; R4 = pixel colour -; R5 = screen byte +; R5 = screen word ; R6 = ptr ; R7 = rnd temp ; R8 = seed @@ -515,9 +506,9 @@ do_fire: str lr, [sp, #-4]! ldr r8, rnd_seed ; seed - mov r9, #1 ; bit + ldr r9, bits_mask ; bit - mov r3, #Screen_Height - 63 + mov r3, #Screen_Height - 123 ; R10 = ptr to start of source line add r10, r12, r3, lsl #7 ; r10 = screen_addr + y * 128 @@ -529,7 +520,8 @@ do_fire: add r3, r3, r2, lsl #5 ; r10 += y * 32 = y * 160 ; R12 = ptr to start of dest line - sub r12, r10, #Screen_Stride + sub r12, r10, #Screen_Stride*2 + add r6, r12, #Screen_Stride .1: mov r2, #0 @@ -539,7 +531,7 @@ do_fire: RND ; source is contiguous - ; read source word = 8x pixels + ; read source word = 4x pixels ldr r5, [r10], #4 ; Byte 0 Left pixel @@ -551,17 +543,6 @@ do_fire: PLOT_PIXEL_BIT add r2, r2, #1 - ; Byte 0 Right pixel - mov r4, r5, lsr #4 - ands r4, r4, #0x0F - moveq r0, r2 - beq .4 - mov r7, r8, lsr #4 - DO_RANDOM_BIT - .4: - PLOT_PIXEL_BIT - add r2, r2, #1 - ; Byte 1 Left pixel mov r4, r5, lsr #8 ands r4, r4, #0x0F @@ -573,17 +554,6 @@ do_fire: PLOT_PIXEL_BIT add r2, r2, #1 - ; Byte 1 Right pixel - mov r4, r5, lsr #12 - ands r4, r4, #0x0F - moveq r0, r2 - beq .6 - mov r7, r8, lsr #12 - DO_RANDOM_BIT - .6: - PLOT_PIXEL_BIT - add r2, r2, #1 - ; Byte 2 Left pixel mov r4, r5, lsr #16 ands r4, r4, #0x0F @@ -595,17 +565,6 @@ do_fire: PLOT_PIXEL_BIT add r2, r2, #1 - ; Byte 2 Right pixel - mov r4, r5, lsr #20 - ands r4, r4, #0x0F - moveq r0, r2 - beq .8 - mov r7, r8, lsr #20 - DO_RANDOM_BIT - .8: - PLOT_PIXEL_BIT - add r2, r2, #1 - ; Byte 3 Left pixel mov r4, r5, lsr #24 ands r4, r4, #0x0F @@ -617,21 +576,13 @@ do_fire: PLOT_PIXEL_BIT add r2, r2, #1 - ; Byte 2 Right pixel - movs r4, r5, lsr #28 - moveq r0, r2 - beq .10 - mov r7, r8, lsr #28 - DO_RANDOM_BIT - .10: - PLOT_PIXEL_BIT - add r2, r2, #1 - - cmp r2, #Screen_Width + cmp r2, #Screen_Stride blt .2 ; Next line - add r12, r12, #Screen_Stride + add r10, r10, #Screen_Stride + add r6, r6, #2*Screen_Stride + add r12, r12, #2*Screen_Stride cmp r12, r3 blt .1 @@ -642,6 +593,9 @@ do_fire: rnd_seed: .long 0x87654321 +bits_mask: + .long 0x11111111 + rnd: ; enter with seed in R0 (32 bits), R1 (1 bit in least significant bit) ; R2 is used as a temporary register. diff --git a/doom-fire.txt b/doom-fire.txt new file mode 100644 index 0000000..c03029a --- /dev/null +++ b/doom-fire.txt @@ -0,0 +1,61 @@ +DOOM Fire +~~~ + + function spreadFire(src) { + var rand = Math.round(Math.random() * 3.0) & 3; + var dst = src - rand + 1; + firePixels[dst - FIRE_WIDTH ] = firePixels[src] - (rand & 1); + } + + function doFire() { + for(x=0 ; x < FIRE_WIDTH; x++) { + for (y = 1; y < FIRE_HEIGHT; y++) { + spreadFire(y * FIRE_WIDTH + x); + } + } + } + +If word is 8x pixels: screen word = 0x87654321 +Then could propagate heat in one go +word -= rand & 0x11111111 +But need to clamp to zero. + +word & 0x88888888 >> 3 | word & 0x44444444 >> 2 | word & 0x22222222 >> 1 | word & 0x11111111 +Would give us a rand_mask with zeros where pixels were 0. + +bits_mask = 0x11111111 + +and temp, screen_word, bits_mask, lsl #3 ; 0x88888888 +mov rand_mask, temp, lsr #3 +and temp, screen_word, bits_mask, lsl #2 ; 0x44444444 +orr rand_mask, rand_mask, temp, lsr #2 +and temp, screen_word, bits_mask, lsl #1 ; 0x22222222 +orr rand_mask, rand_mask, temp, lsr #1 +and temp, screen_word, bits_mask +orr rand_mask, rand_mask, temp + +and temp, rand, rand_mask ; rand & rand_mask +sub screen_word, screen_word, temp ; screen_word -= rand & rand_mask + +Seems like this would be faster. +In terms of writing pixels - can only move +1/-1 from current X so one word +worth of destination can only affect +1/-1 words either side. +Keep a running buffer of 3 words?! + + ; and temp, screen_word, bits_mask, lsl #3 ; 0x88888888 + and r11, r5, r9, lsl #3 + ; mov rand_mask, temp, lsr #3 + mov r7, r11, lsr #3 + + and r11, r5, r9, lsl #2 ; 0x44444444 + orr r7, r7, r11, lsr #2 + and r11, r5, r9, lsl #1 ; 0x22222222 + orr r7, r7, r11, lsr #1 + and r11, r5, r9 ; 0x11111111 + orr r7, r7, r11 + + and r11, r8, r7 ; rand & rand_mask + sub r5, r5, r11 ; screen_word -= rand & rand_mask + +Reduce resolution to 160x64... +Would still only give about 15 cycles per pixel! diff --git a/make.bat b/make.bat index 687a106..45bbaec 100644 --- a/make.bat +++ b/make.bat @@ -1,3 +1,3 @@ @echo off -..\..\vasm\vasmarm_std_win32.exe -L compile.txt -m250 -Fbin -opt-adr -o build\doomfire.bin doom-fire.asm +bin\vasmarm_std_win32.exe -L compile.txt -m250 -Fbin -opt-adr -o build\doomfire.bin doom-fire.asm if %ERRORLEVEL%==0 copy build\doomfire.bin "..\..\Arculator_V2.0_Windows\hostfs\doomfire,ff8"