From: RWilson@acorn.co.uk
Newsgroups: comp.sys.acorn
Subject: How to find the closest colour out of the RISC OS 256
Date: 30 Jan 92 12:20:23 GMT

; MACRO ColErr
;
; Calculates an error value in $error corresponding to the difference ; between the two colours $col1 and $col2. Requires two temporary ; registers. All registers must be different. This routine requires ; 16 instructions, three of which are multiples plus for instructions ; for the error loading calculations. To speed up the multiplies ; the error value for each component is made positive. ;
; The error calculation is controlled by the following ``loading'' values. ; The basic component error calculation, given components c1 and c2 from ; $col1 and $col2, is:-
;

;       e := ((c1-c2) ^ 2) * load

;
; Thus a higher ``load'' makes errors in the component more significant when ; the component value itself is small, however has little effect when the ; component value is large. Each loading must not exceed 65536/3 or the ; error calculation may overflow. If a loading exceeds 32768/3 the result may ; have the top bit set. The loadings are determined algorithmically by the ; following macros - this avoids having to do a real multiplication. The ; macros have the form:-
;

;       XWeight $err, $sqr

;
; and calculate:-
;

;       $err = $sqr * XLoad     (for BLoad)
;       $err += $sqr * XLoad    (for GLoad, RLoad)

;
; $sqr may be overwritten. They are called in the order BLoad, GLoad ; RLoad, and are only called if the corresponding loading value is not ; 1.
;

BLoad   EQU     1
        MACRO

$label BWeight $error, $sqr

        ASSERT  0
        MEND

GLoad   EQU     10
        MACRO

$label GWeight $error, $sqr

$label  ADD     $sqr, $sqr, $sqr, LSL #2
        ADD     $error, $error, $sqr, LSL #1
        MEND

RLoad   EQU     3
        MACRO

$label RWeight $error, $sqr

$label  ADD     $sqr, $sqr, $sqr, LSL #1
        ADD     $error, $error, $sqr
        MEND

        MACRO

$label ColErr $error, $col1, $col2, $temp1, $temp2

$label  MOV     $temp1, $col1, LSR #24
        SUBS    $temp2, $temp1, $col2, LSR #24
        RSBLT   $temp2, $temp2, #0

[ BLoad /= 1

        MUL     $temp1, $temp2, $temp2
        BWeight $error, $temp1

        MUL     $error, $temp2, $temp2

]

        MOV     $temp2, #255
        AND     $temp1, $temp2, $col1, LSR #16
        AND     $temp2, $temp2, $col2, LSR #16
        SUBS    $temp2, $temp1, $temp2
        RSBLT   $temp2, $temp2, #0

[ GLoad /= 1

        MUL     $temp1, $temp2, $temp2
        GWeight $error, $temp1

        MLA     $error, $temp2, $temp2, $error

]

        MOV     $temp2, #255
        AND     $temp1, $temp2, $col1, LSR #8
        AND     $temp2, $temp2, $col2, LSR #8
        SUBS    $temp2, $temp1, $temp2
        RSBLT   $temp2, $temp2, #0

[ RLoad /= 1

        MUL     $temp1, $temp2, $temp2
        RWeight $error, $temp1

        MLA     $error, $temp2, $temp2, $error

]

        MEND

;
; MACRO CompErr
;
; This macro also calculates an error value, however the second colour ; is specified as three separate r, g, b values. The registers containing ; these values can be the same, if desired. The registers should not be ; the same as any of the other registers. The calculation needs 16 ; instructions, including three multiplies. ;

        MACRO

$label CompErr $error, $col, $red, $green, $blue, $temp1, $temp2

$label SUBS $temp2, $blue, $col, LSR #24

        RSBLT   $temp2, $temp2, #0

[ BLoad /= 1

        MUL     $temp1, $temp2, $temp2
        BWeight $error, $temp1

        MUL     $error, $temp2, $temp2

]

        AND     $temp1, $col, #&FF0000        ; green component, still shifted
        SUBS    $temp2, $green, $temp1, LSR #16
        RSBLT   $temp2, $temp2, #0             ; |green error|

[ GLoad /= 1

        MUL     $temp1, $temp2, $temp2
        GWeight $error, $temp1

        MLA     $error, $temp2, $temp2, $error

]

        AND     $temp1, $col, #&FF00          ; red component, still shifted
        SUBS    $temp2, $red, $temp1, LSR #8
        RSBLT   $temp2, $temp2, #0             ; |red error|

[ RLoad /= 1

        MUL     $temp1, $temp2, $temp2
        RWeight $error, $temp1

        MLA     $error, $temp2, $temp2, $error

]

        MEND

;
; MACRO FindCol
;
; This macro finds the closest colour to the given r, g, b triple from ; an array of (word sized) RGB values (encoded BBGGRR00). The macro ; preserves the $red, $green and $blue values, exits with $error set ; to that of the found colour and $index set to the index of the entry. Again, ; all arguments must be different registers. ;

        MACRO

$label FindCol $list, $listend, $red, $green, $blue, $error, $index, $ptr, $col, $temp1, $temp2, $temp3

$label  MOV     $error, #&FFFFFFFF           ; maximum error
        SUB     $ptr, $list, #4              ; points to entry to last entry tried
0:      LDR     $col, [$ptr, #4]!
        CompErr $temp3, $col, $red, $green, $blue, $temp1, $temp2
        CMP     $temp3, $error
        MOVLS   $error, $temp3
        SUBLS   $index, $list, $ptr          ; index * 4
        CMP     $ptr, $listend
        BLO     0b
        MOV     $index, $index, LSR #2
        MEND

;
; MACRO Find256
;
; This macro finds the best matching colour for the *standard* ARM 256 entry palette - the ; one with the R/G/B/T (tint) bits. The algorithm returns a palette value encoded in the ; standard way (BGGRBRTT) in $col and the error in $error. ; All arguments must be different registers. The loop is about 44 instructions, including ; the normal three multiplies. The code goes round it four times, there is a further 12 ; instruction overhead.
;
; The registers must all be different except for $red, $green and $blue, which can be ; the same if desired.
;
; The ARM palette entries are assumed to expand a 4 bit component to an 8 bit component ; using c<<4|c - this has been determined experimentally to give good results. ;

        MACRO

$label Find256 $red, $green, $blue, $error, $col, $tint, $temp1, $temp2, $temp3, $pixel

$label  MOV     $error, #&FFFFFFFF
        MOV     $tint, #&30:SHL:23                ; tint bits unexpanded

0: RSB $temp1, $tint, #&20:SHL:23 ; overflow is not possible here
SUB $temp2, $blue, $blue, LSR #4 ; effectively multiplication by 16/17
ADDS $temp1, $temp1, $temp2, LSL #23
;
; At this point the top bits of $temp1 hold the best blue bit values given
; the current $tint tint bits, however the desired value may be >11tt or <00tt,
; in either case the top bit (bit 31) of $temp1 will be set, hence the N flag
; will be set in the PSR. We must distinguish overflow (>11tt) from a simple
; negative result (<00tt) and truncate both to the appropriate end of the
; scale. We have calculated (blue-tint+&22)<<23. The overflow (V) flag will
; ONLY be set for >11tt; the other possible results (in the range &FF<<23 to
; -&17<<23 are representable without overflow), so:-
;
MOVVSS $temp1, #&7F000000 ; clears the N flag!
MOVMI $temp1, #0
;
; Now extract the blue bits and reconstruct the real (expanded) blue value.
;
AND $temp1, $temp1, #&60000000 ; two blue bits
ADD $temp1, $temp1, $tint ; plus tint
ADD $pixel, $temp1, $temp1, LSR #4 ; expand component bits - 8 bit blue value
;
; Calculate the error as above.
;
SUBS $temp2, $blue, $pixel, LSR #23
RSBLT $temp2, $temp2, #0 ; speeds up multiplication
[ BLoad /= 1
MUL $temp1, $temp2, $temp2
BWeight $temp3, $temp1
|
MUL $temp3, $temp2, $temp2
]
;
; Repeat this for the green component, accumulating the error
;
RSB $temp1, $tint, #&20:SHL:23
SUB $temp2, $green, $green, LSR #4
ADDS $temp1, $temp1, $temp2, LSL #23
MOVVSS $temp1, #&7F000000
MOVMI $temp1, #0
;
AND $temp1, $temp1, #&60000000 ; two green bits
ADD $temp1, $tint, $temp1 ; 4 bit green value
ADD $temp1, $temp1, $temp1, LSR #4 ; expand component bits
ORR $pixel, $pixel, $temp1, LSR #8 ; Accumulate bits in $pixel
;
SUBS $temp2, $green, $temp1, LSR #23
RSBLT $temp2, $temp2, #0
[ GLoad /= 1
MUL $temp1, $temp2, $temp2
GWeight $temp3, $temp1
|
MLA $temp3, $temp2, $temp2, $temp3
]
;
; And the red component:-
;
RSB $temp1, $tint, #&20:SHL:23
SUB $temp2, $red, $red, LSR #4
ADDS $temp1, $temp1, $temp2, LSL #23
MOVVSS $temp1, #&7F000000
MOVMI $temp1, #0
;
AND $temp1, $temp1, #&60000000 ; two red bits
ADD $temp1, $tint, $temp1 ; 4 bit red value
ADD $temp1, $temp1, $temp1, LSR #4 ; expand component bits
ORR $pixel, $pixel, $temp1, LSR #16 ; Accumulate bits in $pixel
;
SUBS $temp2, $red, $temp1, LSR #23
RSBLT $temp2, $temp2, #0
[ RLoad /= 1
MUL $temp1, $temp2, $temp2
RWeight $temp3, $temp1
|
MLA $temp3, $temp2, $temp2, $temp3
]
;
; $temp3 contains the error for the ARM value in $pixel (actually this value
; is shifted right by 1 bit because of the LSL 23 above). Check the error
; and see if this is a better pixel.
;
CMP $temp3, $error
MOVLS $error, $temp3 ; LS, so selects lower tint in preference
MOVLS $col, $pixel ; $col holds best match
;
; Try the next tint
;
SUBS $tint, $tint, #&10:SHL:23
BGE 0b
;
; $error is the error, and is directly comparable with the $error
; value from the other macros. $col to the ARM palette entry using
; the appropriate bit manipulations. The value is currently:-
;
; 0BBTT011 1GGTT011 1RRTT011 10000000
;
; We need to convert this to the form:-
;
; 76543210
; BGGRBRTT
;
AND $temp1, $col, #&40000000 ; B (needs >> 23)
MOV $temp2, $temp1, LSR #23
AND $temp1, $col, #&600000 ; GG (needs >> 16)
ORR $temp2, $temp2, $temp1, LSR #16
AND $temp1, $col, #&4000 ; R (needs >> 10)
ORR $temp2, $temp2, $temp1, LSR #10
AND $temp1, $col, #&20000000 ; B (needs >> 26)
ORR $temp2, $temp2, $temp1, LSR #26
AND $temp1, $col, #&3800 ; RTT (needs >> 11)
ORR $col, $temp2, $temp1, LSR #11

        MEND