Reports

Not sure if this is the most efficient method, but I have a version that uses a fairly small (33 element) lookup table and seems to work pretty well. Haven't benchmarked it but it uses no loops, branching or divisions. It does use a count-leading-zeros function whose efficiency I can't comment on.

typedef int fixed_point;
const int FRACTIONAL_BITS = 16;
const fixed_point ONE = 1 << FRACTIONAL_BITS;

vector<fixed_point> invSqrtLookupTableFixed;

// Initialize the lookup table
void initializeInvSqrtLookupTable() {
    if (invSqrtLookupTableFixed.size()==0) {
        invSqrtLookupTableFixed.resize(33);
        for (int i=0; i<=32; i++) {
            unsigned long long A = 1;  // basically an unsigned very big fixed point. we use 64 bits because  there will be overrun at i=32
            A = 1<<i;  // our positive fixed-point value which corresponds to the i'th element of the table. 
            double fixedPointValueAsFloat = ((double)(A))/((double)ONE);  // convert fixed-point to double
            double valueForTable = 1.f/sqrt(fixedPointValueAsFloat);  // get the inverse square root of our fixed_point. We could make other types of conversion tables by changing the formula
            invSqrtLookupTableFixed[i] = valueForTable*ONE;  // convert float value to fixed and store it in the table.
        }
    }
}

// Float inverse square root using interpolated lookup table
fixed_point floatInvSqrtLookup(fixed_point vectorLengthSquared) {
    // this uses a lookup table to get the 1/sqrt(lengthSquared) which is the value required to normalize a vector. Lenghtsquared is used because it is proportional to the magnitude of the input vector but doesn't require a sqrt() which the actual magnitude would require
    // with a different lookup table this could also return just the sqrt instead of the inverse square root

    //fixed_point fixedVal = vectorLengthSquared*ONE;
    unsigned long leadingZeros;
    _BitScanReverse(&leadingZeros, vectorLengthSquared); //Find the most significant bit.
    unsigned long long bitsA, bitsB;
    bitsA = invSqrtLookupTableFixed[leadingZeros];
    bitsB = invSqrtLookupTableFixed[leadingZeros+1];

    unsigned int giganticized = vectorLengthSquared<<(31-leadingZeros);  // shift the bits so our lengthSquared is now between  2.1billion and 4.2billion which is actually the fraction. 4.2b is where the hi table value is and 2.1b is where the low table value is
    unsigned int interp_percent_as_short = (giganticized-2147483648)>>16;  // now a fraction between 0 and 32767 representing 0->1. this is decoupled from any fixed_point definition

    unsigned long long ff = (bitsA<<15) + interp_percent_as_short * (bitsB-bitsA);  // our interpolation using interp_percent_as_int. everything shifted by 15 bits now that it's been multed by interp_percent_as_int (too big by factor of 32768)
    fixed_point finalFixed = ff >> 15;  // the value from previous fixed-point math produces a value 32768x too high. Divide by 32768 to make it a standard fixed_point. our final fixed value of 1/sqrt(lengthSquared)

    return finalFixed;
}

79487627