dmath.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769
  1. #include "pocketpy/common/dmath.h"
  2. #include "pocketpy/common/algorithm.h"
  3. #include "pocketpy/common/_log_spline_tbl.h"
  4. #include <stdint.h>
  5. union Float64Bits {
  6. double f;
  7. uint64_t i;
  8. };
  9. /* IEEE 754 double precision floating point data manipulation */
  10. typedef union
  11. {
  12. double f;
  13. uint64_t u;
  14. struct {int32_t i0,i1;} s;
  15. } udi_t;
  16. // https://github.com/akohlmey/fastermath/blob/master/src/exp.c#L63
  17. double dmath_exp2(double x) {
  18. if (x > 1000) return DMATH_INFINITY;
  19. if (x < -1000) return 0;
  20. if (dmath_isnan(x)) return DMATH_NAN;
  21. const int FM_DOUBLE_BIAS = 1023;
  22. static const double fm_exp2_q[] = {
  23. /* 1.00000000000000000000e0, */
  24. 2.33184211722314911771e2,
  25. 4.36821166879210612817e3
  26. };
  27. static const double fm_exp2_p[] = {
  28. 2.30933477057345225087e-2,
  29. 2.02020656693165307700e1,
  30. 1.51390680115615096133e3
  31. };
  32. double ipart, fpart, px, qx;
  33. udi_t epart;
  34. ipart = dmath_floor(x+0.5);
  35. fpart = x - ipart;
  36. // FM_DOUBLE_INIT_EXP(epart,ipart);
  37. epart.s.i0 = 0;
  38. epart.s.i1 = (((int) ipart) + FM_DOUBLE_BIAS) << 20;
  39. x = fpart*fpart;
  40. px = fm_exp2_p[0];
  41. px = px*x + fm_exp2_p[1];
  42. qx = x + fm_exp2_q[0];
  43. px = px*x + fm_exp2_p[2];
  44. qx = qx*x + fm_exp2_q[1];
  45. px = px * fpart;
  46. x = 1.0 + 2.0*(px/(qx-px));
  47. return epart.f*x;
  48. }
  49. double dmath_log2(double x) {
  50. if(x < 0) return DMATH_NAN;
  51. if(x == 0) return -DMATH_INFINITY;
  52. if(x == DMATH_INFINITY) return DMATH_INFINITY;
  53. if(dmath_isnan(x)) return DMATH_NAN;
  54. const double fm_log_dinv = 4.09600000000000000000e+03;
  55. const double fm_log_dsq6 = 9.93410746256510361521e-09;
  56. const int FM_DOUBLE_BIAS = 1023;
  57. const int FM_DOUBLE_EMASK = 2146435072;
  58. const int FM_DOUBLE_MBITS = 20;
  59. const int FM_DOUBLE_MMASK = 1048575;
  60. const int FM_DOUBLE_EZERO = 1072693248;
  61. const int FM_SPLINE_SHIFT = 8;
  62. udi_t val;
  63. double a,b,y;
  64. int32_t hx, ipart;
  65. val.f = x;
  66. hx = val.s.i1;
  67. /* extract exponent and subtract bias */
  68. ipart = (((hx & FM_DOUBLE_EMASK) >> FM_DOUBLE_MBITS) - FM_DOUBLE_BIAS);
  69. /* mask out exponent to get the prefactor to 2**ipart */
  70. hx &= FM_DOUBLE_MMASK;
  71. val.s.i1 = hx | FM_DOUBLE_EZERO;
  72. x = val.f;
  73. /* table index */
  74. hx >>= FM_SPLINE_SHIFT;
  75. /* compute x value matching table index */
  76. val.s.i0 = 0;
  77. val.s.i1 = FM_DOUBLE_EZERO | (hx << FM_SPLINE_SHIFT);
  78. b = (x - val.f) * fm_log_dinv;
  79. a = 1.0 - b;
  80. /* evaluate spline */
  81. y = a * fm_log_q1[hx] + b * fm_log_q1[hx+1];
  82. a = (a*a*a-a) * fm_log_q2[hx];
  83. b = (b*b*b-b) * fm_log_q2[hx+1];
  84. y += (a + b) * fm_log_dsq6;
  85. return ((double) ipart) + (y * DMATH_LOG2_E);
  86. }
  87. double dmath_exp(double x) {
  88. return dmath_exp2(x * DMATH_LOG2_E); // log2(e)
  89. }
  90. double dmath_exp10(double x) {
  91. return dmath_exp2(x * 3.321928094887362); // log2(10)
  92. }
  93. double dmath_log(double x) {
  94. return dmath_log2(x) / DMATH_LOG2_E; // log2(e)
  95. }
  96. double dmath_log10(double x) {
  97. return dmath_log2(x) / 3.321928094887362; // log2(10)
  98. }
  99. double dmath_pow(double base, double exp) {
  100. int64_t exp_int = (int64_t)exp;
  101. if(exp_int == exp) {
  102. if(exp_int == 0) return 1;
  103. if(exp_int < 0) {
  104. if(base == 0) return DMATH_NAN;
  105. base = 1 / base;
  106. exp_int = -exp_int;
  107. }
  108. double res = 1;
  109. while(exp_int > 0) {
  110. if(exp_int & 1) res *= base;
  111. base *= base;
  112. exp_int >>= 1;
  113. }
  114. return res;
  115. }
  116. if (base > 0) {
  117. if(base == 1.0) return 1.0;
  118. return dmath_exp(exp * dmath_log(base));
  119. }
  120. if (base == 0) {
  121. if (exp > 0) return 0;
  122. if (exp == 0) return 1;
  123. }
  124. return DMATH_NAN;
  125. }
  126. double dmath_sqrt(double x) {
  127. return dmath_pow(x, 0.5);
  128. }
  129. double dmath_cbrt(double x) {
  130. return dmath_pow(x, 1.0 / 3.0);
  131. }
  132. // https://github.com/kraj/musl/blob/kraj/master/src/math/sincos.c
  133. static double __sin(double x, double y, int iy)
  134. {
  135. static const double
  136. S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
  137. S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
  138. S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
  139. S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
  140. S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
  141. S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
  142. double z,r,v,w;
  143. z = x*x;
  144. w = z*z;
  145. r = S2 + z*(S3 + z*S4) + z*w*(S5 + z*S6);
  146. v = z*x;
  147. if (iy == 0)
  148. return x + v*(S1 + z*r);
  149. else
  150. return x - ((z*(0.5*y - v*r) - y) - v*S1);
  151. }
  152. static double __cos(double x, double y)
  153. {
  154. static const double
  155. C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
  156. C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
  157. C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
  158. C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
  159. C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
  160. C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
  161. double hz,z,r,w;
  162. z = x*x;
  163. w = z*z;
  164. r = z*(C1+z*(C2+z*C3)) + w*w*(C4+z*(C5+z*C6));
  165. hz = 0.5*z;
  166. w = 1.0-hz;
  167. return w + (((1.0-w)-hz) + (z*r-x*y));
  168. }
  169. int __rem_pio2(double x, double *y)
  170. {
  171. static const double
  172. toint = 1.5/2.22044604925031308085e-16,
  173. pio4 = 0x1.921fb54442d18p-1,
  174. invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
  175. pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
  176. pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
  177. pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
  178. pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
  179. pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
  180. pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
  181. union Float64Bits u = { .f = x };
  182. double z,w,t,r,fn;
  183. double tx[3],ty[2];
  184. uint32_t ix;
  185. int sign, n, ex, ey, i;
  186. sign = u.i>>63;
  187. ix = u.i>>32 & 0x7fffffff;
  188. if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
  189. if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
  190. goto medium; /* cancellation -- use medium case */
  191. if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
  192. if (!sign) {
  193. z = x - pio2_1; /* one round good to 85 bits */
  194. y[0] = z - pio2_1t;
  195. y[1] = (z-y[0]) - pio2_1t;
  196. return 1;
  197. } else {
  198. z = x + pio2_1;
  199. y[0] = z + pio2_1t;
  200. y[1] = (z-y[0]) + pio2_1t;
  201. return -1;
  202. }
  203. } else {
  204. if (!sign) {
  205. z = x - 2*pio2_1;
  206. y[0] = z - 2*pio2_1t;
  207. y[1] = (z-y[0]) - 2*pio2_1t;
  208. return 2;
  209. } else {
  210. z = x + 2*pio2_1;
  211. y[0] = z + 2*pio2_1t;
  212. y[1] = (z-y[0]) + 2*pio2_1t;
  213. return -2;
  214. }
  215. }
  216. }
  217. if (ix <= 0x401c463b) { /* |x| ~<= 9pi/4 */
  218. if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
  219. if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
  220. goto medium;
  221. if (!sign) {
  222. z = x - 3*pio2_1;
  223. y[0] = z - 3*pio2_1t;
  224. y[1] = (z-y[0]) - 3*pio2_1t;
  225. return 3;
  226. } else {
  227. z = x + 3*pio2_1;
  228. y[0] = z + 3*pio2_1t;
  229. y[1] = (z-y[0]) + 3*pio2_1t;
  230. return -3;
  231. }
  232. } else {
  233. if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
  234. goto medium;
  235. if (!sign) {
  236. z = x - 4*pio2_1;
  237. y[0] = z - 4*pio2_1t;
  238. y[1] = (z-y[0]) - 4*pio2_1t;
  239. return 4;
  240. } else {
  241. z = x + 4*pio2_1;
  242. y[0] = z + 4*pio2_1t;
  243. y[1] = (z-y[0]) + 4*pio2_1t;
  244. return -4;
  245. }
  246. }
  247. }
  248. if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
  249. medium:
  250. /* rint(x/(pi/2)) */
  251. fn = (double)x*invpio2 + toint - toint;
  252. n = (int32_t)fn;
  253. r = x - fn*pio2_1;
  254. w = fn*pio2_1t; /* 1st round, good to 85 bits */
  255. /* Matters with directed rounding. */
  256. if ((r - w < -pio4)) {
  257. n--;
  258. fn--;
  259. r = x - fn*pio2_1;
  260. w = fn*pio2_1t;
  261. } else if ((r - w > pio4)) {
  262. n++;
  263. fn++;
  264. r = x - fn*pio2_1;
  265. w = fn*pio2_1t;
  266. }
  267. y[0] = r - w;
  268. u.f = y[0];
  269. ey = u.i>>52 & 0x7ff;
  270. ex = ix>>20;
  271. if (ex - ey > 16) { /* 2nd round, good to 118 bits */
  272. t = r;
  273. w = fn*pio2_2;
  274. r = t - w;
  275. w = fn*pio2_2t - ((t-r)-w);
  276. y[0] = r - w;
  277. u.f = y[0];
  278. ey = u.i>>52 & 0x7ff;
  279. if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
  280. t = r;
  281. w = fn*pio2_3;
  282. r = t - w;
  283. w = fn*pio2_3t - ((t-r)-w);
  284. y[0] = r - w;
  285. }
  286. }
  287. y[1] = (r - y[0]) - w;
  288. return n;
  289. }
  290. (void)tx;
  291. (void)ty;
  292. (void)i;
  293. return 0;
  294. #if 0
  295. /*
  296. * all other (large) arguments
  297. */
  298. if (ix >= 0x7ff00000) { /* x is inf or NaN */
  299. y[0] = y[1] = x - x;
  300. return 0;
  301. }
  302. /* set z = scalbn(|x|,-ilogb(x)+23) */
  303. u.f = x;
  304. u.i &= (uint64_t)-1>>12;
  305. u.i |= (uint64_t)(0x3ff + 23)<<52;
  306. z = u.f;
  307. for (i=0; i < 2; i++) {
  308. tx[i] = (double)(int32_t)z;
  309. z = (z-tx[i])*0x1p24;
  310. }
  311. tx[i] = z;
  312. /* skip zero terms, first term is non-zero */
  313. while (tx[i] == 0.0)
  314. i--;
  315. n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1);
  316. if (sign) {
  317. y[0] = -ty[0];
  318. y[1] = -ty[1];
  319. return -n;
  320. }
  321. y[0] = ty[0];
  322. y[1] = ty[1];
  323. return n;
  324. #endif
  325. }
  326. void dmath_sincos(double x, double *sin, double *cos) {
  327. double y[2], s, c;
  328. uint32_t ix;
  329. unsigned n;
  330. //GET_HIGH_WORD(ix, x);
  331. union Float64Bits u = { .f = x };
  332. ix = (uint32_t)(u.i >> 32);
  333. ix &= 0x7fffffff;
  334. /* |x| ~< pi/4 */
  335. if (ix <= 0x3fe921fb) {
  336. /* if |x| < 2**-27 * sqrt(2) */
  337. if (ix < 0x3e46a09e) {
  338. /* raise inexact if x!=0 and underflow if subnormal */
  339. // FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
  340. volatile double y_force_eval;
  341. y_force_eval = ix < 0x00100000 ? x/0x1p120f : x+0x1p120f;
  342. (void)y_force_eval;
  343. *sin = x;
  344. *cos = 1.0;
  345. return;
  346. }
  347. *sin = __sin(x, 0.0, 0);
  348. *cos = __cos(x, 0.0);
  349. return;
  350. }
  351. /* sincos(Inf or NaN) is NaN */
  352. if (ix >= 0x7ff00000) {
  353. *sin = *cos = x - x;
  354. return;
  355. }
  356. /* argument reduction needed */
  357. n = __rem_pio2(x, y);
  358. s = __sin(y[0], y[1], 1);
  359. c = __cos(y[0], y[1]);
  360. switch (n&3) {
  361. case 0:
  362. *sin = s;
  363. *cos = c;
  364. break;
  365. case 1:
  366. *sin = c;
  367. *cos = -s;
  368. break;
  369. case 2:
  370. *sin = -s;
  371. *cos = -c;
  372. break;
  373. case 3:
  374. default:
  375. *sin = -c;
  376. *cos = s;
  377. break;
  378. }
  379. }
  380. double dmath_sin(double x) {
  381. double s, c;
  382. dmath_sincos(x, &s, &c);
  383. return s;
  384. }
  385. double dmath_cos(double x) {
  386. double s, c;
  387. dmath_sincos(x, &s, &c);
  388. return c;
  389. }
  390. double dmath_tan(double x) {
  391. double s, c;
  392. dmath_sincos(x, &s, &c);
  393. return s / c;
  394. }
  395. static double zig_r64(double z) {
  396. const double pS0 = 1.66666666666666657415e-01;
  397. const double pS1 = -3.25565818622400915405e-01;
  398. const double pS2 = 2.01212532134862925881e-01;
  399. const double pS3 = -4.00555345006794114027e-02;
  400. const double pS4 = 7.91534994289814532176e-04;
  401. const double pS5 = 3.47933107596021167570e-05;
  402. const double qS1 = -2.40339491173441421878e+00;
  403. const double qS2 = 2.02094576023350569471e+00;
  404. const double qS3 = -6.88283971605453293030e-01;
  405. const double qS4 = 7.70381505559019352791e-02;
  406. double p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
  407. double q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
  408. return p / q;
  409. }
  410. // https://github.com/ziglang/zig/blob/master/lib/std/math/asin.zig
  411. double dmath_asin(double x) {
  412. if(!(x >= -1 && x <= 1)) return DMATH_NAN;
  413. const double pio2_hi = 1.57079632679489655800e+00;
  414. const double pio2_lo = 6.12323399573676603587e-17;
  415. union Float64Bits ux_union;
  416. ux_union.f = x;
  417. uint64_t ux = ux_union.i;
  418. uint32_t hx = (uint32_t)(ux >> 32);
  419. uint32_t ix = hx & 0x7FFFFFFF;
  420. /* |x| >= 1 or nan */
  421. if (ix >= 0x3FF00000) {
  422. uint32_t lx = (uint32_t)(ux & 0xFFFFFFFF);
  423. /* asin(1) = +-pi/2 with inexact */
  424. if (((ix - 0x3FF00000) | lx) == 0) {
  425. return x * pio2_hi + 0x1.0p-120;
  426. } else {
  427. return DMATH_NAN;
  428. }
  429. }
  430. /* |x| < 0.5 */
  431. if (ix < 0x3FE00000) {
  432. /* if 0x1p-1022 <= |x| < 0x1p-26 avoid raising overflow */
  433. if (ix < 0x3E500000 && ix >= 0x00100000) {
  434. return x;
  435. } else {
  436. return x + x * zig_r64(x * x);
  437. }
  438. }
  439. /* 1 > |x| >= 0.5 */
  440. double z = (1 - dmath_fabs(x)) * 0.5;
  441. double s = dmath_sqrt(z);
  442. double r = zig_r64(z);
  443. double fx;
  444. /* |x| > 0.975 */
  445. if (ix >= 0x3FEF3333) {
  446. fx = pio2_hi - 2 * (s + s * r);
  447. } else {
  448. union Float64Bits jx_union = { .f = s };
  449. uint64_t jx = jx_union.i;
  450. union Float64Bits df_union = { .i = jx & 0xFFFFFFFF00000000ULL };
  451. double df = df_union.f;
  452. double c = (z - df * df) / (s + df);
  453. fx = 0.5 * pio2_hi - (2 * s * r - (pio2_lo - 2 * c) - (0.5 * pio2_hi - 2 * df));
  454. }
  455. if (hx >> 31 != 0) {
  456. return -fx;
  457. } else {
  458. return fx;
  459. }
  460. }
  461. double dmath_acos(double x) {
  462. return DMATH_PI / 2 - dmath_asin(x);
  463. }
  464. double dmath_atan(double x) {
  465. return dmath_asin(x / dmath_sqrt(1 + x * x));
  466. }
  467. double dmath_atan2(double y, double x)
  468. {
  469. const double
  470. pi = 3.1415926535897931160E+00, /* 0x400921FB, 0x54442D18 */
  471. pi_lo = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */
  472. double z;
  473. uint32_t m,lx,ly,ix,iy;
  474. if (dmath_isnan(x) || dmath_isnan(y))
  475. return x+y;
  476. // EXTRACT_WORDS(ix, lx, x);
  477. // EXTRACT_WORDS(iy, ly, y);
  478. union Float64Bits ux = { .f = x }, uy = { .f = y };
  479. ix = (uint32_t)(ux.i >> 32);
  480. iy = (uint32_t)(uy.i >> 32);
  481. lx = (uint32_t)(ux.i & 0xFFFFFFFF);
  482. ly = (uint32_t)(uy.i & 0xFFFFFFFF);
  483. if ((ix-0x3ff00000 | lx) == 0) /* x = 1.0 */
  484. return dmath_atan(y);
  485. m = ((iy>>31)&1) | ((ix>>30)&2); /* 2*sign(x)+sign(y) */
  486. ix = ix & 0x7fffffff;
  487. iy = iy & 0x7fffffff;
  488. /* when y = 0 */
  489. if ((iy|ly) == 0) {
  490. switch(m) {
  491. case 0:
  492. case 1: return y; /* atan(+-0,+anything)=+-0 */
  493. case 2: return pi; /* atan(+0,-anything) = pi */
  494. case 3: return -pi; /* atan(-0,-anything) =-pi */
  495. }
  496. }
  497. /* when x = 0 */
  498. if ((ix|lx) == 0)
  499. return m&1 ? -pi/2 : pi/2;
  500. /* when x is INF */
  501. if (ix == 0x7ff00000) {
  502. if (iy == 0x7ff00000) {
  503. switch(m) {
  504. case 0: return pi/4; /* atan(+INF,+INF) */
  505. case 1: return -pi/4; /* atan(-INF,+INF) */
  506. case 2: return 3*pi/4; /* atan(+INF,-INF) */
  507. case 3: return -3*pi/4; /* atan(-INF,-INF) */
  508. }
  509. } else {
  510. switch(m) {
  511. case 0: return 0.0; /* atan(+...,+INF) */
  512. case 1: return -0.0; /* atan(-...,+INF) */
  513. case 2: return pi; /* atan(+...,-INF) */
  514. case 3: return -pi; /* atan(-...,-INF) */
  515. }
  516. }
  517. }
  518. /* |y/x| > 0x1p64 */
  519. if (ix+(64<<20) < iy || iy == 0x7ff00000)
  520. return m&1 ? -pi/2 : pi/2;
  521. /* z = atan(|y/x|) without spurious underflow */
  522. if ((m&2) && iy+(64<<20) < ix) /* |y/x| < 0x1p-64, x<0 */
  523. z = 0;
  524. else
  525. z = dmath_atan(dmath_fabs(y/x));
  526. switch (m) {
  527. case 0: return z; /* atan(+,+) */
  528. case 1: return -z; /* atan(-,+) */
  529. case 2: return pi - (z-pi_lo); /* atan(+,-) */
  530. default: /* case 3 */
  531. return (z-pi_lo) - pi; /* atan(-,-) */
  532. }
  533. }
  534. ////////////////////////////////////////////////////////////////////
  535. int dmath_isinf(double x) {
  536. union Float64Bits u = { .f = x };
  537. return (u.i & -1ULL>>1) == 0x7ffULL<<52;
  538. }
  539. int dmath_isnan(double x) {
  540. union Float64Bits u = { .f = x };
  541. return (u.i & -1ULL>>1) > 0x7ffULL<<52;
  542. }
  543. int dmath_isnormal(double x) {
  544. union Float64Bits u = { .f = x };
  545. return ((u.i+(1ULL<<52)) & -1ULL>>1) >= 1ULL<<53;
  546. }
  547. int dmath_isfinite(double x) {
  548. union Float64Bits u = { .f = x };
  549. return (u.i & -1ULL>>1) < 0x7ffULL<<52;
  550. }
  551. // https://github.com/kraj/musl/blob/kraj/master/src/math/fmod.c
  552. double dmath_fmod(double x, double y) {
  553. if(y == 0) return DMATH_NAN;
  554. union Float64Bits ux = { .f = x }, uy = { .f = y };
  555. int ex = ux.i>>52 & 0x7ff;
  556. int ey = uy.i>>52 & 0x7ff;
  557. int sx = ux.i>>63;
  558. uint64_t i;
  559. /* in the followings uxi should be ux.i, but then gcc wrongly adds */
  560. /* float load/store to inner loops ruining performance and code size */
  561. uint64_t uxi = ux.i;
  562. if (uy.i<<1 == 0 || dmath_isnan(y) || ex == 0x7ff)
  563. return (x*y)/(x*y);
  564. if (uxi<<1 <= uy.i<<1) {
  565. if (uxi<<1 == uy.i<<1)
  566. return 0*x;
  567. return x;
  568. }
  569. /* normalize x and y */
  570. if (!ex) {
  571. for (i = uxi<<12; i>>63 == 0; ex--, i <<= 1);
  572. uxi <<= -ex + 1;
  573. } else {
  574. uxi &= -1ULL >> 12;
  575. uxi |= 1ULL << 52;
  576. }
  577. if (!ey) {
  578. for (i = uy.i<<12; i>>63 == 0; ey--, i <<= 1);
  579. uy.i <<= -ey + 1;
  580. } else {
  581. uy.i &= -1ULL >> 12;
  582. uy.i |= 1ULL << 52;
  583. }
  584. /* x mod y */
  585. for (; ex > ey; ex--) {
  586. i = uxi - uy.i;
  587. if (i >> 63 == 0) {
  588. if (i == 0)
  589. return 0*x;
  590. uxi = i;
  591. }
  592. uxi <<= 1;
  593. }
  594. i = uxi - uy.i;
  595. if (i >> 63 == 0) {
  596. if (i == 0)
  597. return 0*x;
  598. uxi = i;
  599. }
  600. for (; uxi>>52 == 0; uxi <<= 1, ex--);
  601. /* scale result */
  602. if (ex > 0) {
  603. uxi -= 1ULL << 52;
  604. uxi |= (uint64_t)ex << 52;
  605. } else {
  606. uxi >>= -ex + 1;
  607. }
  608. uxi |= (uint64_t)sx << 63;
  609. ux.i = uxi;
  610. return ux.f;
  611. }
  612. // https://github.com/kraj/musl/blob/kraj/master/src/math/copysign.c
  613. double dmath_copysign(double x, double y) {
  614. union Float64Bits ux = { .f = x }, uy = { .f = y };
  615. ux.i &= -1ULL/2;
  616. ux.i |= uy.i & 1ULL<<63;
  617. return ux.f;
  618. }
  619. double dmath_fabs(double x) {
  620. return (x < 0) ? -x : x;
  621. }
  622. double dmath_ceil(double x) {
  623. if(!dmath_isfinite(x)) return x;
  624. int64_t int_part = (int64_t)x;
  625. if (x > 0 && x != (double)int_part) {
  626. return (double)(int_part + 1);
  627. }
  628. return (double)int_part;
  629. }
  630. double dmath_floor(double x) {
  631. if(!dmath_isfinite(x)) return x;
  632. int64_t int_part = (int64_t)x;
  633. if (x < 0 && x != (double)int_part) {
  634. return (double)(int_part - 1);
  635. }
  636. return (double)int_part;
  637. }
  638. double dmath_trunc(double x) {
  639. return (double)((int64_t)x);
  640. }
  641. // https://github.com/kraj/musl/blob/kraj/master/src/math/modf.c
  642. double dmath_modf(double x, double* iptr) {
  643. union Float64Bits u = { .f = x };
  644. uint64_t mask;
  645. int e = (int)(u.i>>52 & 0x7ff) - 0x3ff;
  646. /* no fractional part */
  647. if (e >= 52) {
  648. *iptr = x;
  649. if (e == 0x400 && u.i<<12 != 0) /* nan */
  650. return x;
  651. u.i &= 1ULL<<63;
  652. return u.f;
  653. }
  654. /* no integral part*/
  655. if (e < 0) {
  656. u.i &= 1ULL<<63;
  657. *iptr = u.f;
  658. return x;
  659. }
  660. mask = -1ULL>>12>>e;
  661. if ((u.i & mask) == 0) {
  662. *iptr = x;
  663. u.i &= 1ULL<<63;
  664. return u.f;
  665. }
  666. u.i &= ~mask;
  667. *iptr = u.f;
  668. return x - u.f;
  669. }
  670. double dmath_fmin(double x, double y) {
  671. return (x < y) ? x : y;
  672. }
  673. double dmath_fmax(double x, double y) {
  674. return (x > y) ? x : y;
  675. }