Well... the code's more readable at least!
I took things a step further than originally planned, from simply 'removal of bad assumptions' to 'make the code more portable and readable' as well.
This has resulted in me learning something about C that I always took for granted under NASM and other Assemblers. There's no way to define a macro to spit out an unknown-until-invocation number of lines of text.
I.E. As far as I can tell, there's no way to make a macro that I can use to simplify the insanely-convoluted piles of 'case' statements down to 'If THESE bits have THIS value' instead of 'If the value is X' anywhere from 1 to 256 times each. =-.-=
If anyone out there knows a trick to do such under stock C, or at least under GNU C, lemme know? It'll make the remaining code much easier to understand.
Now all I have to do is merge the 2x, 3x, and 4x include files into one file, though I'm also investigating making a series of lookup tables, one for each pixel location. Right now the code-size is a rather insane 100+k in the 4x case, but MUCH of it is redundant code. If I can use jump-tables (NOT a call-table) to speed that code up, things would be much smaller. And if I can get all the code to fit in 16-odd-k along with the data, suddenly things get much nicer for the CPU. =^.^=
And for anyone curious about the new code compared to the old code:
| #define Whole(x) | (x) /* Always safe */ |
| #define Half(x) | (((x)&_Mask_)>>1) |
| #define Quarter(x) | (((x)&(_Mask_&(_Mask_<<1)))>>2) |
| #define Eighth(x) | (((x)&(_Mask_&(_Mask_<<1)&(_Mask_<<2)))>>3) |
| #define Sixteenth(x) | (((x)&(_Mask_&(_Mask_<<1)&(_Mask_<<2)&(_Mask_<<3)))>>4) |
| /* ((c1*1.0000)) */ | |
| #define Interp0(c1) | (Whole(c[c1])) |
| /* ((c1*0.7500)+(c2*0.2500)) */ | |
| #define Interp1(c1,c2) | (Whole(c[c1])-Quarter(c[c1])+Quarter(c[c2])) |
| /* ((c1*0.5000)+(c2*0.2500)+(c3*0.2500)) */ | |
| #define Interp2(c1,c2,c3) | (Half(c[c1])+Quarter(c[c2])+Quarter(c[c3])) |
| /* ((c1*0.8750)+(c2*0.1250)) */ | |
| #define Interp3(c1,c2) | (Whole(c[c1])-Eighth(c[c1])+Eighth(c[c2])) |
Compare to the original code:
inline void Interp1(unsigned char * pc, int c1, int c2)
{
*((int*)pc) = (c1*3+c2) >> 2;
}
inline void Interp2(unsigned char * pc, int c1, int c2, int c3)
{
*((int*)pc) = (c1*2+c2+c3) >> 2;
}
inline void Interp3(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1*7+c2)/8;
*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
(((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
}