fp64-to-fp32-conversion.shader_test

# Conversion from double to float 
# IEEE 754 compliant

[require]
GLSL >= 1.30

[vertex shader]
#version 130

void main()
{
    gl_Position = gl_Vertex;
}

[fragment shader]
#version 130

/* Software IEEE floating-point rounding mode. */
uint float_rounding_mode;
const uint float_round_nearest_even = 0u;
const uint float_round_to_zero      = 1u;
const uint float_round_down         = 2u;
const uint float_round_up           = 3u;

/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a
 * single-precision floating-point value, returning the result.  After being
 * shifted into the proper positions, the three fields are simply added
 * together to form the result.  This means that any integer portion of `zSig'
 * will be added into the exponent.  Since a properly normalized significand
 * will have an integer portion equal to 1, the `zExp' input should be 1 less
 * than the desired result exponent whenever `zFrac' is a complete, normalized
 * significand.
 */
uint
packFloat32( uint zSign, uint zExp, uint zFrac )
{
    return ( zSign<<31 ) + ( zExp<<23 ) + zFrac;
}

/* Shifts `a' right by the number of bits given in `count'.  If any nonzero
 * bits are shifted off, they are "jammed" into the least significant bit of
 * the result by setting the least significant bit to 1.  The value of `count'
 * can be arbitrarily large; in particular, if `count' is greater than 32, the
 * result will be either 0 or 1, depending on whether `a' is zero or nonzero.
 * The result is stored in the location pointed to by `zPtr'.
 */
void
shift32RightJamming( uint a, int count, inout uint zPtr )
{
    uint z;

    if( count == 0 ) {
        z = a;
    } else if( count < 32 ) {
        z = ( a>>count ) | uint ( ( a<<( ( - count ) & 31 ) ) != 0u );
    } else {
        z = uint ( a != 0u );
    }
    zPtr = z;
}

/* Shifts the 64-bit value formed by concatenating `a.x' and `a.y' right by the
 * number of bits given in `count'.  If any nonzero bits are shifted off, they
 * are "jammed" into the least significant bit of the result by setting the
 * least significant bit to 1.  The value of `count' can be arbitrarily large;
 * in particular, if `count' is greater than 64, the result will be either 0
 * or 1, depending on whether the concatenation of `a.x' and `a.y' is zero or
 * nonzero.  The result is broken into two 32-bit pieces which are stored at
 * the locations pointed to by `z0Ptr' and `z1Ptr'.
 */
void
shift64RightJamming( uvec2 a,
                     int count,
                     inout uint z0Ptr,
                     inout uint z1Ptr )
{
    uint z0;
    uint z1;
    int negCount = ( - count ) & 31;

    if ( count == 0 ) {
        z1 = a.y;
        z0 = a.x;
    } else if ( count < 32 ) {
        z1 = ( a.x<<negCount ) |
                ( a.y>>count ) |
                uint ( ( a.y<<negCount ) != 0u );
        z0 = a.x>>count;
    } else {
        if ( count == 32 ) {
            z1 = a.x | uint ( a.y != 0u );
        } else if ( count < 64 ) {
            z1 = ( a.x>>( count & 31 ) ) |
                uint ( ( ( a.x<<negCount ) | a.y ) != 0u );
        } else {
            z1 = uint ( ( a.x | a.y ) != 0u );
        }
        z0 = 0u;
    }
    z1Ptr = z1;
    z0Ptr = z0;
}

/* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 * and significand `zFrac', and returns the proper single-precision floating-
 * point value corresponding to the abstract input.  Ordinarily, the abstract
 * value is simply rounded and packed into the single-precision format, with
 * the inexact exception raised if the abstract input cannot be represented
 * exactly.  However, if the abstract value is too large, the overflow and
 * inexact exceptions are raised and an infinity or maximal finite value is
 * returned.  If the abstract value is too small, the input value is rounded to
 * a subnormal number, and the underflow and inexact exceptions are raised if
 * the abstract input cannot be represented exactly as a subnormal single-
 * precision floating-point number.
 *     The input significand `zFrac' has its binary point between bits 30
 * and 29, which is 7 bits to the left of the usual location.  This shifted
 * significand must be normalized or smaller.  If `zFrac' is not normalized,
 * `zExp' must be 0; in that case, the result returned is a subnormal number,
 * and it must not require rounding.  In the usual case that `zFrac' is
 * normalized, `zExp' must be 1 less than the "true" floating-point exponent.
 * The handling of underflow and overflow follows the IEEE Standard for
 * Floating-Point Arithmetic.
 */
uint
roundAndPackFloat32( uint zSign, uint zExp, uint zFrac )
{
    uint roundingMode;
    uint roundNearestEven;
    uint roundIncrement;
    uint roundBits;

    roundingMode = float_rounding_mode;
    roundNearestEven = uint ( roundingMode == float_round_nearest_even );
    roundIncrement = 0x40u;
    if ( roundNearestEven == 0u ) {
        if ( roundingMode == float_round_to_zero ) {
            roundIncrement = 0u;
        } else {
            roundIncrement = 0x7Fu;
            if ( zSign != 0u ) {
                if ( roundingMode == float_round_up ) {
                      roundIncrement = 0u;
                }
            } else {
                if ( roundingMode == float_round_down ) {
                      roundIncrement = 0u;
                }
            }
        }
    }
    roundBits = zFrac & 0x7Fu;
    if ( 0xFDu <= zExp ) {
        if ( ( 0xFDu < zExp ) ||
            ( ( zExp == 0xFDu ) && ( ( zFrac + roundIncrement ) < 0u ) ) ) {
            return packFloat32(
                zSign, 0xFFu, 0u ) - uint ( roundIncrement == 0u );
        }
        if ( zExp < 0u ) {
            shift32RightJamming( zFrac, - int (zExp), zFrac );
            zExp = 0u;
            roundBits = zFrac & 0x7Fu;
        }
    }
    zFrac = ( zFrac + roundIncrement )>>7;
    zFrac &= ~ ( uint ( ( roundBits ^ 0x40u ) == 0u ) & roundNearestEven );
    if ( zFrac == 0u ) {
        zExp = 0u;
    }
    return packFloat32( zSign, zExp, zFrac );
}


/* Returns the fraction bits of the double-precision floating-point value `a'.*/
uvec2
extractFloat64Frac( uvec2 a )
{
    return uvec2( a.x & 0x000FFFFFu, a.y );
}

/* Returns the exponent bits of the double-precision floating-point value `a'.*/
uint
extractFloat64Exp( uvec2 a )
{
    return (a.x>>20) & 0x7FFu;
}

/* Returns the sign bit of the double-precision floating-point value `a'.*/
uint
extractFloat64Sign( uvec2 a )
{
    return (a.x>>31);
}

/* Returns the result of converting the double-precision floating-point value
 * `a' to the single-precision floating-point format.  The conversion is
 * performed according to the IEEE Standard for Floating-Point Arithmetic.
 */
uint
fp64_to_fp32( uvec2 a )
{
    uint aSign;
    uint aExp;
    uint zFrac;
    uint allZero;
    uvec2 aFrac;

    aFrac = extractFloat64Frac( a );
    aExp = extractFloat64Exp( a );
    aSign = extractFloat64Sign( a );
    if ( aExp == 0x7FFu ) {
        if ( ( aFrac.x | aFrac.y ) != 0u ) {
            return ( aSign<<31 ) | 0x7FC00000u |
                ( ( aFrac.x & 0x000FFFFFu )<<3 ) | ( aFrac.y>>29 );
        }
        return packFloat32( aSign, 0xFFu, 0u );
    }
    shift64RightJamming( aFrac, 22, allZero, zFrac );
    if ( aExp != 0u ) {
        zFrac |= 0x40000000u;
    }
    return roundAndPackFloat32( aSign, aExp - 0x381u, zFrac );
}

uniform uvec2 a;
uniform uint expected;

void main()
{
    /* Generate green if the expected value is producted, red
     * otherwise.
     */
    gl_FragColor = fp64_to_fp32(a) == expected
        ? vec4(0.0, 1.0, 0.0, 1.0)
        : vec4(1.0, 0.0, 0.0, 1.0);
}

[test]
# A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
# 'draw rect' line draws a rectangle that covers the whole window.
# The 'probe all' line verifies that every pixel contains the expected
# color.

# Try +Inf
uniform uvec2 a       0x7FF00000 0x00000000
uniform uint expected 0x7F800000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try -Inf
uniform uvec2 a       0xFFF00000 0x00000000
uniform uint expected 0xFF800000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try 50
uniform uvec2 a       0x40490000 0x00000000
uniform uint expected 0x42480000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try -50
uniform uvec2 a       0xC0490000 0x00000000
uniform uint expected 0xC2480000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try 1
uniform uvec2 a       0x3FF00000 0x00000000
uniform uint expected 0x3F800000
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0

# Try 0.4
uniform uvec2 a       0x3FD99999 0x9999999A
uniform uint expected 0x3ECCCCCD
draw rect -1 -1 2 2
probe all rgba 0.0 1.0 0.0 1.0