-
Notifications
You must be signed in to change notification settings - Fork 0
/
io_utils.go
213 lines (176 loc) · 6.64 KB
/
io_utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
package gandalff
import (
"encoding/binary"
"fmt"
"math"
"regexp"
"strings"
"time"
)
// Python divmod build-in for Go.
func DivMod(n, d int64) (q, r int64) {
q = n / d
r = n % d
// the numerator or the denominator is negarive (but not both)
if r != 0 && n*d < 0 {
q--
r += d
}
return
}
// SAS float numbers.
//
// SAS supports 27 special missing values, allowing the categorization of
// missing data by tagging or labeling missing values using the letters A to Z
// or an underscore.
type SasFloat []byte
var SpecialMissingValueRegex = regexp.MustCompile("^[A-Z_]\x00\x00\x00\x00\x00\x00\x00$")
// Check if float is a SpecialMissingValue from an XPORT-format bytestring.
func IsIbmSpecialMissingValue(ieee float64) bool {
ulong := math.Float64bits(ieee)
buff := make([]byte, 8)
binary.BigEndian.PutUint64(buff, ulong)
if ((buff[0] >= 'A' && buff[0] <= 'Z') || buff[0] == '_') && (ulong&0x00ffffffffffffff == 0) {
return true
}
return false
}
func NewSasFloat(b []byte) *SasFloat {
sf := SasFloat(b)
return &sf
}
// Convert IBM-format floating point (bytes) to IEEE 754 64-bit (float).
func (sf *SasFloat) ToIeee(byteOrder binary.ByteOrder) (float64, error) {
// IBM mainframe: sign * 0.mantissa * 16 ** (exponent - 64)
// Python uses IEEE: sign * 1.mantissa * 2 ** (exponent - 1023)
// Pad-out to 8 bytes if necessary. We expect 2 to 8 bytes, but
// there's no need to check; bizarre sizes will cause a struct
// module unpack error.
ibm := *sf
for len(ibm) < 8 {
ibm = append(ibm, 0)
}
// parse the 64 bits of IBM float as one 8-byte unsigned long long
ulong := byteOrder.Uint64(ibm)
// IBM: 1-bit sign, 7-bits exponent, 56-bits mantissa
sign := int64(ulong & 0x8000000000000000)
exponent := int64(ulong&0x7f00000000000000) >> 56
mantissa := ulong & 0x00ffffffffffffff
if mantissa == 0 {
if ibm[0] == 0 || ibm[0] == '\x80' {
return 0.0, nil
} else if ibm[0] == '.' {
return math.NaN(), nil
} else if (ibm[0] >= 'A' && ibm[0] <= 'Z') || ibm[0] == '_' {
return math.Float64frombits(ulong & 0xffff000000000000), nil
} else {
return 0.0, fmt.Errorf("neither \"true\" zero nor NaN: %s", ibm)
}
}
// IBM-format exponent is base 16, so the mantissa can have up to 3
// leading zero-bits in the binary mantissa. IEEE format exponent
// is base 2, so we don't need any leading zero-bits and will shift
// accordingly. This is one of the criticisms of IBM-format, its
// wobbling precision.
shift := int64(0)
if ulong&0x0080000000000000 != 0 {
shift = 3
} else if ulong&0x0040000000000000 != 0 {
shift = 2
} else if ulong&0x0020000000000000 != 0 {
shift = 1
}
mantissa >>= shift
// clear the 1 bit to the left of the binary point
// this is implicit in IEEE specification
mantissa &= 0xffefffffffffffff
// IBM exponent is excess 64, but we subtract 65, because of the
// implicit 1 left of the radix point for the IEEE mantissa
exponent -= 65
// IBM exponent is base 16, IEEE is base 2, so we multiply by 4
exponent <<= 2
// IEEE exponent is excess 1023, but we also increment for each
// right-shift when aligning the mantissa's first 1-bit
exponent += shift + 1023
// IEEE: 1-bit sign, 11-bits exponent, 52-bits mantissa
// We didn't shift the sign bit, so it's already in the right spot
return math.Float64frombits(uint64(sign|exponent<<52) | mantissa), nil
}
// Convert Python floating point numbers to IBM-format (bytes).
func (sf *SasFloat) FromIeee(ieee float64, byteOrder binary.ByteOrder) error {
// Python uses IEEE: sign * 1.mantissa * 2 ** (exponent - 1023)
// IBM mainframe: sign * 0.mantissa * 16 ** (exponent - 64)
if ieee == 0.0 {
*sf = SasFloat([]byte{0, 0, 0, 0, 0, 0, 0, 0})
return nil
}
// The IBM hexadecimal floating point (HFP) format represents the number
// zero with all zero bits. All zero bits is the "true zero" or normalized
// form of zero. Any values for the sign and exponent can be used if the
// mantissa portion of the encoding is all zero bits, but an IBM machine
// might lose precision when performing arithmetic with alternative zero
// representations. With that in mind, and because this format was not
// defined with a mechanism for not-a-number (NaN) values, SAS uses
// alternative zero encodings to represent NaN. By default, a SAS missing
// value is encoded with an ASCII-encoded period (".") as the first byte.
if math.IsNaN(ieee) {
*sf = SasFloat([]byte{'.', 0, 0, 0, 0, 0, 0, 0})
return nil
}
if math.IsInf(ieee, 0) {
return fmt.Errorf("cannot convert infinity")
}
ulong := math.Float64bits(ieee)
sign := int64((ulong & (1 << 63)) >> 63) // 1-bit sign
exponent := int64((ulong&(0x7ff<<52))>>52) - 1023 // 11-bits exponent
mantissa := int64(ulong & 0x000fffffffffffff) // 52-bits mantissa/significand
// Special Missing Values
buff := make([]byte, 8)
byteOrder.PutUint64(buff, ulong)
if ((buff[0] >= 'A' && buff[0] <= 'Z') || buff[0] == '_') && (ulong&0x00ffffffffffffff == 0) {
*sf = SasFloat([]byte{buff[0], 0, 0, 0, 0, 0, 0, 0})
return nil
}
if exponent > 248 {
return fmt.Errorf("cannot store magnitude more than ~ 16 ** 63 as IBM-format")
}
if exponent < -260 {
return fmt.Errorf("cannot store magnitude less than ~ 16 ** -65 as IBM-format")
}
// IEEE mantissa has an implicit 1 left of the radix: 1.significand
// IBM mantissa has an implicit 0 left of the radix: 0.significand
// We must bitwise-or the implicit 1.mmm into the mantissa
// later we will increment the exponent to account for this change
mantissa = 0x0010000000000000 | mantissa
// IEEE exponents are for base 2: mantissa * 2 ** exponent
// IBM exponents are for base 16: mantissa * 16 ** exponent
// We must divide the exponent by 4, since 16 ** x == 2 ** (4 * x)
q, remainder := DivMod(exponent, 4)
exponent = q
// We don't want to lose information;
// the remainder from the divided exponent adjusts the mantissa
mantissa <<= remainder
// Increment exponent, because of earlier adjustment to mantissa
// this corresponds to the 1.mantissa vs 0.mantissa implicit bit
exponent += 1
// IBM exponents are excess 64
exponent += 64
// IBM has 1-bit sign, 7-bits exponent, and 56-bits mantissa.
// We must shift the sign and exponent into their places.
sign <<= 63
exponent <<= 56
// We lose some precision, but who said floats were perfect?
buff = make([]byte, 8)
byteOrder.PutUint64(buff, uint64(sign|exponent|mantissa))
*sf = SasFloat(buff)
return nil
}
// This functions formats the date/time in SAS format, e.g. 28OCT23:14:46:32
func formatDateTimeSAS(t time.Time) string {
return fmt.Sprintf(
"%02d%s%d:%02d:%02d:%02d",
t.Day(),
strings.ToUpper(t.Month().String()[:3]),
t.Year()%100,
t.Hour(), t.Minute(), t.Second())
}