64 bits double representation:

http://en.wikipedia.org/wiki/IEEE_754-1985

What is the real number represented inside a double type?

The 64 bits double type uses

The number normalized represented in base 2 is:

(1 + 0.xxx)

where 1 is inplicty and xxx are the mantissa digits in binary (base 2).

To convert the fractional binary number in decimal we sum the digits that are 1 plus the implity 1:

     1      1
1 + --- + ---- ....
    2^2    2^3

Sample:

The number 0.15625.

 0                                                                 63
 |        mantissa                                    | exp         | signal
 0000000000000000000000000000000000000000000000000010 | 00111111110 | 0

double   = 0.15625
mantissa = 1125899906842624
exponent = -3
signal   = 0

The second digit of mantissa is 1 then we have to include it in the sum plus the inplicity 1.

         1       -3
 ( 1 +  --- ) * 2 
        2^2 

=>
        1      1
 ( 1 + ---) * --- 
        4      8
=>
  1      1  
  -- +  --- 
  8      32 
=>
  4  + 1  
  ------- 
     32

=>
      5  
  ------- = 1.15625 (exactly)
     32

Some numbers

See Limits on Floating-Point Constants


Maximum representable floating-point number.

DBL


bits =
1111111111111111111111111111111111111111111111111111011111111110

binary number =
1.1111111111111111111111111111111111111111111111111111 * 2 ^ 1023

double   = 1.79769e+308
mantissa = 4503599627370495
exponent = 1023
signal   = 0
representation :
(2 ^ 1023 + 2 ^ 1022 + 2 ^ 1021 + 2 ^ 1020 + 2 ^ 1019 + 2 ^ 1018 + 2 ^ 1017 + 2
^ 1016 + 2 ^ 1015 + 2 ^ 1014 + 2 ^ 1013 + 2 ^ 1012 + 2 ^ 1011 + 2 ^ 1010 + 2 ^ 1
009 + 2 ^ 1008 + 2 ^ 1007 + 2 ^ 1006 + 2 ^ 1005 + 2 ^ 1004 + 2 ^ 1003 + 2 ^ 1002
 + 2 ^ 1001 + 2 ^ 1000 + 2 ^ 999 + 2 ^ 998 + 2 ^ 997 + 2 ^ 996 + 2 ^ 995 + 2 ^ 9
94 + 2 ^ 993 + 2 ^ 992 + 2 ^ 991 + 2 ^ 990 + 2 ^ 989 + 2 ^ 988 + 2 ^ 987 + 2 ^ 9
86 + 2 ^ 985 + 2 ^ 984 + 2 ^ 983 + 2 ^ 982 + 2 ^ 981 + 2 ^ 980 + 2 ^ 979 + 2 ^ 9
78 + 2 ^ 977 + 2 ^ 976 + 2 ^ 975 + 2 ^ 974 + 2 ^ 973 + 2 ^ 972 + 2 ^ 971)


Decimal:

179769313486231570814527423731704356798070567525844996598917476803157260 ...
780028538760589558632766878171540458953514382464234321326889464182768467 ...
546703537516986049910576551282076245490090389328944075868508455133942304 ...
583236903222948165808559332123348274797826204144723168738177180919299881 ...
250404026184124858368


Smallest positive number x, such that x + 1.0 is not equal to 1.0.

DBL

bits =
0000000000000000000000000000000000000000000000000000110100111100

binary number =
1.0000000000000000000000000000000000000000000000000000 * 2 ^ -52

double   = 2.22045e-016
mantissa = 0
exponent = -52
signal   = 0
representation :
(2 ^ -52)


       1
----------------
4503599627370496


Minimum positive value.

DBL

bits =
0000000000000000000000000000000000000000000000000000100000000000

binary number =
1.0000000000000000000000000000000000000000000000000000 * 2 ^ -1022

double   = 2.22507e-308
mantissa = 0
exponent = -1022
signal   = 0
representation :
(2 ^ -1022)


1/4494232837155789769323262976972561834044942447355766431835752028943316...
895137524078317711933060188400528002846996784833941469744220360415562321...
185765986853109444197335621637131907555490031152352986327073802125144220...
953767058561572036847827763520680929083762767114657455998681148461992907...
6208839082406056034304


NaN

bits =
0000000000000000000000000000000000000000000000000000111111111110

binary number =
1.0000000000000000000000000000000000000000000000000000 * 2 ^ 1024

double   = 1.#INF
mantissa = 0
exponent = 1024
signal   = 0
representation :
(2 ^ 1024)

This is reserved for NAN



Second biggest number:

bits =
0111111111111111111111111111111111111111111111111111011111111110

binary number =
1.1111111111111111111111111111111111111111111111111110 * 2 ^ 1023

double   = 1.79769e+308
mantissa = 4503599627370494
exponent = 1023
signal   = 0
representation :
(2 ^ 1023 + 2 ^ 1022 + 2 ^ 1021 + 2 ^ 1020 + 2 ^ 1019 + 2 ^ 1018 + 2 ^ 1017 + 2
^ 1016 + 2 ^ 1015 + 2 ^ 1014 + 2 ^ 1013 + 2 ^ 1012 + 2 ^ 1011 + 2 ^ 1010 + 2 ^ 1
009 + 2 ^ 1008 + 2 ^ 1007 + 2 ^ 1006 + 2 ^ 1005 + 2 ^ 1004 + 2 ^ 1003 + 2 ^ 1002
 + 2 ^ 1001 + 2 ^ 1000 + 2 ^ 999 + 2 ^ 998 + 2 ^ 997 + 2 ^ 996 + 2 ^ 995 + 2 ^ 9
94 + 2 ^ 993 + 2 ^ 992 + 2 ^ 991 + 2 ^ 990 + 2 ^ 989 + 2 ^ 988 + 2 ^ 987 + 2 ^ 9
86 + 2 ^ 985 + 2 ^ 984 + 2 ^ 983 + 2 ^ 982 + 2 ^ 981 + 2 ^ 980 + 2 ^ 979 + 2 ^ 9
78 + 2 ^ 977 + 2 ^ 976 + 2 ^ 975 + 2 ^ 974 + 2 ^ 973 + 2 ^ 972)


1797693134862315508561243283845062402343434371574593359244048724485818...
457545561143884706399431262203219608040271573715708098528849645117430...
440876627676009095943319277282370788761887605795325637686986540648252...
621157710157914639830148577040081234194593862451417237031480975291084...
23358883457665451722744025579520

Distance between first and second biggest numbers:

1995840309534719811656372713036838566067451260435457541502547242437211...
8918689640657849579654926357010893424468441924952439724379883935936607...
3917179828483142032000567295108567651753772144436298718265335674454392...
3993330810455120870388888855268448044157507120906875756041642358495230...
3440099278848

Source code

Include

memory

limits

template<class T>
void PrintBitsLn(T v)
{
    std::cout << "bits = " << std::endl;

    for (int i = 0; i < sizeof(T) * CHAR_BIT ; i++)
    {
        std::cout << (memory::is_bit_on(v, i) ? '1' : '0');
    }

    std::cout << std::endl;
}


void PrintBinary(unsigned long long u)
{
    std::cout << "binary number = " << std::endl;
    long long exponent = memory::getbits(u, 52, 11) - 1023;
    unsigned long long signal = memory::getbits(u, 64 - 1, 1);
    if (signal == -1)
      std::cout << "-";

    std::cout << "1.";
    for (int i = 51; i >= 0; i--)
    {
        std::cout << (memory::is_bit_on(u, i) ? '1' : '0');
    }

    std::cout << " * 2 ^ " << exponent << std::endl;
    std::cout << std::endl;
}


void Frac(double d)
{
    //http://en.wikipedia.org/wiki/IEEE_754-1985

    static_assert(sizeof(d) * CHAR_BIT == 64, "expected double with 64 bits");
    unsigned long long u = *((unsigned long long*)(&d));

    unsigned long long mantissa = memory::getbits(u, 0, 52);
    long long exponent = memory::getbits(u, 52, 11) - 1023;
    unsigned long long signal = memory::getbits(u, 64 - 1, 1);
  
    PrintBitsLn(u);
    std::cout << std::endl;
    PrintBinary(u);

    std::cout << "double   = " << d << endl;
    std::cout << "mantissa = " << mantissa << endl;
    std::cout << "exponent = " << exponent << endl;
    std::cout << "signal   = " << signal << endl;

    if (exponent == -1023)
    {
        cout << "0" << endl;
        return;
    }

    if (signal == 1)
    {
        cout << " -1 * " ;
    }

    std::cout << "representation :" << std::endl;
    cout << "(2 ^ " << exponent << "";

    for (int i = 51; i >= 0 ; i--)
    {
        if (memory::is_bit_on(u, i))
        {
            cout << " + 2 ^ " << (exponent  - (52 - i));
        }
    }

    cout << ")" << endl;
}