123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268 |
- /*************************************************************************
- * Copyright (C) [2020] by Cambricon, Inc. All rights reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *************************************************************************/
- #include "half.h"
- inline bool diffNotMuch(half a, half b) {
- return true;
- float c = static_cast<float>(a);
- float d = static_cast<float>(b);
- if (c == 0 || d == 0) {
- return true;
- }
- c = c > 0 ? c : 0;
- d = d > 0 ? d : 0;
- if ((c / d) > DIFF_SCALE && (c / d) > DIFF_SCALE) {
- return false;
- }
- return true;
- }
- half::half() {}
- half::~half() {}
- half::half(const float a) { data_ = float2half(a); }
- // Data Cast
- half::operator int() {
- float a = half::half2float(data_);
- return static_cast<int>(a);
- }
- half::operator float() {
- float a = half::half2float(data_);
- return a;
- }
- half::operator double() {
- float a = half::half2float(data_);
- return static_cast<double>(a);
- }
- std::ostream& operator<<(std::ostream& output, const half& c) {
- float data_f = half::half2float(c.data_);
- output << data_f;
- return output;
- }
- std::istream& operator>>(std::istream& input, half& c) {
- float data_f;
- input >> data_f;
- c.data_ = half::float2half(data_f);
- return input;
- }
- half operator+(const int& a, const half& b) {
- float c = a;
- float b_f = half::half2float(b.data_);
- float result = c + b_f;
- result = result > HALF_MAX ? HALF_MAX : result;
- result = result < HALF_MIN ? HALF_MIN : result;
- return result;
- }
- half& half::operator=(const half& a) {
- data_ = a.data_;
- return *this;
- }
- half half::operator-(void) { return (half(0) - *this); }
- half half::operator+(const half& a) {
- assert(diffNotMuch(*this, a));
- float data_f = half2float(data_);
- float a_f = half2float(a.data_);
- float result = data_f + a_f;
- result = result > HALF_MAX ? HALF_MAX : result;
- result = result < HALF_MIN ? HALF_MIN : result;
- return result;
- }
- half half::operator-(const half& a) {
- assert(diffNotMuch(*this, a));
- float data_f = half2float(data_);
- float a_f = half2float(a.data_);
- float result = data_f - a_f;
- result = result > HALF_MAX ? HALF_MAX : result;
- result = result < HALF_MIN ? HALF_MIN : result;
- return result;
- }
- half half::operator*(const half& a) {
- assert(diffNotMuch(*this, a));
- float data_f = half2float(data_);
- float a_f = half2float(a.data_);
- float result = data_f * a_f;
- result = result > HALF_MAX ? HALF_MAX : result;
- result = result < HALF_MIN ? HALF_MIN : result;
- return result;
- }
- half half::operator/(const half& a) {
- assert(diffNotMuch(*this, a));
- float data_f = half2float(data_);
- float a_f = half2float(a.data_);
- float result = data_f / a_f;
- result = result > HALF_MAX ? HALF_MAX : result;
- result = result < HALF_MIN ? HALF_MIN : result;
- return result;
- }
- half& half::operator+=(const half& a) {
- assert(diffNotMuch(*this, a));
- half result = *this + a;
- data_ = result.data_;
- return *this;
- }
- half& half::operator-=(const half& a) {
- assert(diffNotMuch(*this, a));
- half result = *this - a;
- data_ = result.data_;
- return *this;
- }
- half& half::operator*=(const half& a) {
- assert(diffNotMuch(*this, a));
- half result = *this * a;
- data_ = result.data_;
- return *this;
- }
- half& half::operator/=(const half& a) {
- assert(diffNotMuch(*this, a));
- half result = *this / a;
- data_ = result.data_;
- return *this;
- }
- bool half::operator<(const half& a) {
- float data_f = half2float(this->data_);
- float a_f = half2float(a.data_);
- return data_f < a_f ? true : false;
- }
- bool half::operator<=(const half& a) {
- float data_f = half2float(this->data_);
- float a_f = half2float(a.data_);
- return data_f <= a_f ? true : false;
- }
- bool half::operator>(const half& a) {
- float data_f = half2float(this->data_);
- float a_f = half2float(a.data_);
- return data_f > a_f ? true : false;
- }
- bool half::operator>=(const half& a) {
- float data_f = half2float(this->data_);
- float a_f = half2float(a.data_);
- return data_f >= a_f ? true : false;
- }
- bool half::operator==(const half& a) { return data_ == a.data_ ? true : false; }
- bool half::operator!=(const half& a) { return data_ != a.data_ ? true : false; }
- uint16_t half::float2half(const float f) {
- // assert((f > HALF_MIN) && (f < HALF_MAX));
- // assert((f == 0) || (f > HALF_PRECISION) || (f < -HALF_PRECISION));
- _bit32_u u;
- u.f = f;
- unsigned int bytes = u.i;
- unsigned char sign = (bytes >> 31) & 0x00000001;
- unsigned char exp = (bytes >> 23) & 0x000000FF;
- unsigned int eff = ((bytes >> 13) & 0x000003FF); // + ((bytes >> 12) & 0x00000001);
- if (exp == 0xFF) {
- // inf or nan
- exp = 0x1F;
- if (eff) {
- // nan -NaN +NaN
- return sign ? 0xFFFF : 0x7FFF;
- } else {
- // inf -inf +inf
- return sign ? 0xFC00 : 0x7C00;
- }
- } else if (exp == 0x00) {
- // zero or denormal
- if (eff) {
- // denormal
- return sign ? 0x8000 : 0x0000;
- } else {
- return sign ? 0x8000 : 0x0000;
- }
- } else if (exp - 0x7F >= 0x1F - 0x0F) {
- // +/- inf
- // inf -inf +inf
- return sign ? 0xFC00 : 0x7C00;
- } else if (exp - 0x7F <= 0x00 - 0x0F) {
- // denormal
- int shift = (0x7F - exp - 0x0E);
- shift = shift > 11 ? 11 : shift;
- return ((sign << 15) | ((0x0400 | eff) >> shift));
- } else {
- // normal number
- exp = ((exp - 0x7F) + 0x0F) & 0x1F;
- return (sign << 15) | (exp << 10) | eff;
- }
- }
- float half::half2float(const uint16_t f) {
- unsigned char sign = (f >> 15) & 0x01;
- unsigned char exp = (f >> 10) & 0x1F;
- unsigned int eff = f & 0x03FF;
- unsigned int result;
- if (exp == 0x1F) {
- // handle inf of nan
- if (eff) {
- // NaN
- result = sign ? 0xFFFFFFFF : 0x7FFFFFFF;
- } else {
- // +/- inf
- result = sign ? 0xFF800000 : 0x7F800000;
- }
- } else if (exp == 0x00) {
- if (eff) {
- // denormal
- unsigned int result_base;
- result = (sign << 31) | ((0x7F - 0x0E) << 23) | (eff << (23 - 10));
- // substruct the 1.xxxxxx in eff
- result_base = (sign << 31) | ((0x7F - 0x0E) << 23) | (0x00000000 << 13);
- _bit32_u u1, u2;
- u1.i = result;
- u2.i = result_base;
- return u1.f - u2.f;
- } else {
- // zero
- result = (sign << 31) | 0x00000000;
- }
- } else {
- // normal number
- exp = (exp - 0x0F) + 0x7F;
- result = (sign << 31) | (exp << 23) | (eff << (23 - 10));
- }
- _bit32_u u;
- u.i = result;
- return u.f;
- }
|