Skip to content

Commit

Permalink
Half screen resolution.
Browse files Browse the repository at this point in the history
Much faster.
  • Loading branch information
kieranhj committed Sep 17, 2020
1 parent 98ffc3f commit ede8881
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 64 deletions.
Binary file added bin/vasmarm_std_win32.exe
Binary file not shown.
80 changes: 17 additions & 63 deletions doom-fire.asm
Original file line number Diff line number Diff line change
Expand Up @@ -479,31 +479,22 @@ plot_horizontal_line:
add r0, r2, r7
subs r0, r0, #1 ; dest_x += (rnd & 3)-1
movlt r0, #0 ; or MOD ScreenWidth

; update colour with some randomness
; and r7, r7, #1 ; rnd & 1
subs r4, r4, r7, lsr #1 ; colour -= rnd & 1
.endm

.macro PLOT_PIXEL_BIT
add r6, r12, r0, lsr #1 ; r6 = dest_start + x DIV 2

ldrb r11, [r6] ; load screen byte
tst r0, #1 ; odd or even pixel?
andeq r11, r11, #0xF0 ; mask out left hand pixel
orreq r11, r11, r4 ; mask in colour as left hand pixel
andne r11, r11, #0x0F ; mask out right hand pixel
orrne r11, r11, r4, lsl #4 ; mask in colour as right hand pixel
strb r11, [r6] ; store screen byte
orr r11, r4, r4, lsl #4
strb r11, [r12, r0] ; dest_start + x
strb r11, [r6, r0] ; dest_start + x
.endm

; DOOM FIRE!
; R0 = x
; R1 = y
; R2 = x
; R3 = y
; R3 = end of screen address (for loop termination)
; R4 = pixel colour
; R5 = screen byte
; R5 = screen word
; R6 = ptr
; R7 = rnd temp
; R8 = seed
Expand All @@ -515,9 +506,9 @@ do_fire:
str lr, [sp, #-4]!

ldr r8, rnd_seed ; seed
mov r9, #1 ; bit
ldr r9, bits_mask ; bit

mov r3, #Screen_Height - 63
mov r3, #Screen_Height - 123

; R10 = ptr to start of source line
add r10, r12, r3, lsl #7 ; r10 = screen_addr + y * 128
Expand All @@ -529,7 +520,8 @@ do_fire:
add r3, r3, r2, lsl #5 ; r10 += y * 32 = y * 160

; R12 = ptr to start of dest line
sub r12, r10, #Screen_Stride
sub r12, r10, #Screen_Stride*2
add r6, r12, #Screen_Stride

.1:
mov r2, #0
Expand All @@ -539,7 +531,7 @@ do_fire:
RND

; source is contiguous
; read source word = 8x pixels
; read source word = 4x pixels
ldr r5, [r10], #4

; Byte 0 Left pixel
Expand All @@ -551,17 +543,6 @@ do_fire:
PLOT_PIXEL_BIT
add r2, r2, #1

; Byte 0 Right pixel
mov r4, r5, lsr #4
ands r4, r4, #0x0F
moveq r0, r2
beq .4
mov r7, r8, lsr #4
DO_RANDOM_BIT
.4:
PLOT_PIXEL_BIT
add r2, r2, #1

; Byte 1 Left pixel
mov r4, r5, lsr #8
ands r4, r4, #0x0F
Expand All @@ -573,17 +554,6 @@ do_fire:
PLOT_PIXEL_BIT
add r2, r2, #1

; Byte 1 Right pixel
mov r4, r5, lsr #12
ands r4, r4, #0x0F
moveq r0, r2
beq .6
mov r7, r8, lsr #12
DO_RANDOM_BIT
.6:
PLOT_PIXEL_BIT
add r2, r2, #1

; Byte 2 Left pixel
mov r4, r5, lsr #16
ands r4, r4, #0x0F
Expand All @@ -595,17 +565,6 @@ do_fire:
PLOT_PIXEL_BIT
add r2, r2, #1

; Byte 2 Right pixel
mov r4, r5, lsr #20
ands r4, r4, #0x0F
moveq r0, r2
beq .8
mov r7, r8, lsr #20
DO_RANDOM_BIT
.8:
PLOT_PIXEL_BIT
add r2, r2, #1

; Byte 3 Left pixel
mov r4, r5, lsr #24
ands r4, r4, #0x0F
Expand All @@ -617,21 +576,13 @@ do_fire:
PLOT_PIXEL_BIT
add r2, r2, #1

; Byte 2 Right pixel
movs r4, r5, lsr #28
moveq r0, r2
beq .10
mov r7, r8, lsr #28
DO_RANDOM_BIT
.10:
PLOT_PIXEL_BIT
add r2, r2, #1

cmp r2, #Screen_Width
cmp r2, #Screen_Stride
blt .2

; Next line
add r12, r12, #Screen_Stride
add r10, r10, #Screen_Stride
add r6, r6, #2*Screen_Stride
add r12, r12, #2*Screen_Stride
cmp r12, r3
blt .1

Expand All @@ -642,6 +593,9 @@ do_fire:
rnd_seed:
.long 0x87654321

bits_mask:
.long 0x11111111

rnd:
; enter with seed in R0 (32 bits), R1 (1 bit in least significant bit)
; R2 is used as a temporary register.
Expand Down
61 changes: 61 additions & 0 deletions doom-fire.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
DOOM Fire
~~~

function spreadFire(src) {
var rand = Math.round(Math.random() * 3.0) & 3;
var dst = src - rand + 1;
firePixels[dst - FIRE_WIDTH ] = firePixels[src] - (rand & 1);
}

function doFire() {
for(x=0 ; x < FIRE_WIDTH; x++) {
for (y = 1; y < FIRE_HEIGHT; y++) {
spreadFire(y * FIRE_WIDTH + x);
}
}
}

If word is 8x pixels: screen word = 0x87654321
Then could propagate heat in one go
word -= rand & 0x11111111
But need to clamp to zero.

word & 0x88888888 >> 3 | word & 0x44444444 >> 2 | word & 0x22222222 >> 1 | word & 0x11111111
Would give us a rand_mask with zeros where pixels were 0.

bits_mask = 0x11111111

and temp, screen_word, bits_mask, lsl #3 ; 0x88888888
mov rand_mask, temp, lsr #3
and temp, screen_word, bits_mask, lsl #2 ; 0x44444444
orr rand_mask, rand_mask, temp, lsr #2
and temp, screen_word, bits_mask, lsl #1 ; 0x22222222
orr rand_mask, rand_mask, temp, lsr #1
and temp, screen_word, bits_mask
orr rand_mask, rand_mask, temp

and temp, rand, rand_mask ; rand & rand_mask
sub screen_word, screen_word, temp ; screen_word -= rand & rand_mask

Seems like this would be faster.
In terms of writing pixels - can only move +1/-1 from current X so one word
worth of destination can only affect +1/-1 words either side.
Keep a running buffer of 3 words?!

; and temp, screen_word, bits_mask, lsl #3 ; 0x88888888
and r11, r5, r9, lsl #3
; mov rand_mask, temp, lsr #3
mov r7, r11, lsr #3

and r11, r5, r9, lsl #2 ; 0x44444444
orr r7, r7, r11, lsr #2
and r11, r5, r9, lsl #1 ; 0x22222222
orr r7, r7, r11, lsr #1
and r11, r5, r9 ; 0x11111111
orr r7, r7, r11

and r11, r8, r7 ; rand & rand_mask
sub r5, r5, r11 ; screen_word -= rand & rand_mask

Reduce resolution to 160x64...
Would still only give about 15 cycles per pixel!
2 changes: 1 addition & 1 deletion make.bat
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
@echo off
..\..\vasm\vasmarm_std_win32.exe -L compile.txt -m250 -Fbin -opt-adr -o build\doomfire.bin doom-fire.asm
bin\vasmarm_std_win32.exe -L compile.txt -m250 -Fbin -opt-adr -o build\doomfire.bin doom-fire.asm
if %ERRORLEVEL%==0 copy build\doomfire.bin "..\..\Arculator_V2.0_Windows\hostfs\doomfire,ff8"

0 comments on commit ede8881

Please sign in to comment.